summaryrefslogtreecommitdiff
path: root/src/shader_recompiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler')
-rw-r--r--src/shader_recompiler/CMakeLists.txt268
-rw-r--r--src/shader_recompiler/backend/bindings.h19
-rw-r--r--src/shader_recompiler/backend/glasm/emit_context.cpp154
-rw-r--r--src/shader_recompiler/backend/glasm/emit_context.h80
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm.cpp492
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm.h25
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp91
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp244
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp346
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp231
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp414
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_image.cpp850
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_instructions.h625
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp294
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp568
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp273
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_select.cpp67
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp58
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_special.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp150
-rw-r--r--src/shader_recompiler/backend/glasm/reg_alloc.cpp186
-rw-r--r--src/shader_recompiler/backend/glasm/reg_alloc.h303
-rw-r--r--src/shader_recompiler/backend/glsl/emit_context.cpp715
-rw-r--r--src/shader_recompiler/backend/glsl/emit_context.h174
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl.cpp252
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl.h24
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp418
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp21
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp94
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp219
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp456
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp21
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp230
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp456
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_image.cpp799
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_instructions.h702
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp253
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp28
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp202
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp105
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_select.cpp55
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp79
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_special.cpp111
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp32
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp217
-rw-r--r--src/shader_recompiler/backend/glsl/var_alloc.cpp308
-rw-r--r--src/shader_recompiler/backend/glsl/var_alloc.h105
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.cpp1368
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.h307
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.cpp541
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.h27
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp448
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp38
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp66
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp155
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp505
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp28
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp269
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp396
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_image.cpp462
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp183
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_instructions.h579
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp270
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp26
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp275
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_select.cpp42
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp174
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_special.cpp150
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp30
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp203
-rw-r--r--src/shader_recompiler/environment.h53
-rw-r--r--src/shader_recompiler/exception.h66
-rw-r--r--src/shader_recompiler/frontend/ir/abstract_syntax_list.h58
-rw-r--r--src/shader_recompiler/frontend/ir/attribute.cpp454
-rw-r--r--src/shader_recompiler/frontend/ir/attribute.h250
-rw-r--r--src/shader_recompiler/frontend/ir/basic_block.cpp149
-rw-r--r--src/shader_recompiler/frontend/ir/basic_block.h185
-rw-r--r--src/shader_recompiler/frontend/ir/breadth_first_search.h56
-rw-r--r--src/shader_recompiler/frontend/ir/condition.cpp29
-rw-r--r--src/shader_recompiler/frontend/ir/condition.h60
-rw-r--r--src/shader_recompiler/frontend/ir/flow_test.cpp83
-rw-r--r--src/shader_recompiler/frontend/ir/flow_test.h62
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp2017
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h413
-rw-r--r--src/shader_recompiler/frontend/ir/microinstruction.cpp411
-rw-r--r--src/shader_recompiler/frontend/ir/modifiers.h49
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.cpp15
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.h110
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.inc550
-rw-r--r--src/shader_recompiler/frontend/ir/patch.cpp28
-rw-r--r--src/shader_recompiler/frontend/ir/patch.h149
-rw-r--r--src/shader_recompiler/frontend/ir/post_order.cpp46
-rw-r--r--src/shader_recompiler/frontend/ir/post_order.h14
-rw-r--r--src/shader_recompiler/frontend/ir/pred.h44
-rw-r--r--src/shader_recompiler/frontend/ir/program.cpp32
-rw-r--r--src/shader_recompiler/frontend/ir/program.h35
-rw-r--r--src/shader_recompiler/frontend/ir/reg.h332
-rw-r--r--src/shader_recompiler/frontend/ir/type.cpp38
-rw-r--r--src/shader_recompiler/frontend/ir/type.h61
-rw-r--r--src/shader_recompiler/frontend/ir/value.cpp99
-rw-r--r--src/shader_recompiler/frontend/ir/value.h398
-rw-r--r--src/shader_recompiler/frontend/maxwell/control_flow.cpp642
-rw-r--r--src/shader_recompiler/frontend/maxwell/control_flow.h169
-rw-r--r--src/shader_recompiler/frontend/maxwell/decode.cpp149
-rw-r--r--src/shader_recompiler/frontend/maxwell/decode.h14
-rw-r--r--src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp108
-rw-r--r--src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h28
-rw-r--r--src/shader_recompiler/frontend/maxwell/instruction.h63
-rw-r--r--src/shader_recompiler/frontend/maxwell/location.h112
-rw-r--r--src/shader_recompiler/frontend/maxwell/maxwell.inc286
-rw-r--r--src/shader_recompiler/frontend/maxwell/opcodes.cpp26
-rw-r--r--src/shader_recompiler/frontend/maxwell/opcodes.h30
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp883
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.h20
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp214
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp110
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp35
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp96
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp74
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp36
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h57
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp153
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h28
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp72
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp58
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp50
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp54
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp43
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp47
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp82
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp78
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp214
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp253
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp94
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp127
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp41
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp60
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp125
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp169
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h42
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp143
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp117
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp118
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp272
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.h387
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp105
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp122
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp48
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp80
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp182
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp82
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp64
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp36
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp86
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp58
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp135
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp126
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp53
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h39
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp108
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp196
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp218
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp184
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp116
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp122
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp181
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp283
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp45
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp46
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp38
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp53
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp205
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp281
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp236
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp266
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp208
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp134
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp182
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp165
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp242
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp131
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp76
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp30
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h23
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp64
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp54
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp69
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/translate.cpp52
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/translate.h14
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp223
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.h23
-rw-r--r--src/shader_recompiler/host_translate_info.h18
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp928
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp610
-rw-r--r--src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp26
-rw-r--r--src/shader_recompiler/ir_opt/dual_vertex_pass.cpp30
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp526
-rw-r--r--src/shader_recompiler/ir_opt/identity_removal_pass.cpp38
-rw-r--r--src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp143
-rw-r--r--src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp218
-rw-r--r--src/shader_recompiler/ir_opt/passes.h32
-rw-r--r--src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp383
-rw-r--r--src/shader_recompiler/ir_opt/texture_pass.cpp523
-rw-r--r--src/shader_recompiler/ir_opt/verification_pass.cpp98
-rw-r--r--src/shader_recompiler/object_pool.h104
-rw-r--r--src/shader_recompiler/profile.h74
-rw-r--r--src/shader_recompiler/program_header.h219
-rw-r--r--src/shader_recompiler/runtime_info.h88
-rw-r--r--src/shader_recompiler/shader_info.h193
-rw-r--r--src/shader_recompiler/stage.h28
-rw-r--r--src/shader_recompiler/varying_state.h69
233 files changed, 41653 insertions, 0 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
new file mode 100644
index 000000000..b5b7e5e83
--- /dev/null
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -0,0 +1,268 @@
1add_library(shader_recompiler STATIC
2 backend/bindings.h
3 backend/glasm/emit_context.cpp
4 backend/glasm/emit_context.h
5 backend/glasm/emit_glasm.cpp
6 backend/glasm/emit_glasm.h
7 backend/glasm/emit_glasm_barriers.cpp
8 backend/glasm/emit_glasm_bitwise_conversion.cpp
9 backend/glasm/emit_glasm_composite.cpp
10 backend/glasm/emit_glasm_context_get_set.cpp
11 backend/glasm/emit_glasm_control_flow.cpp
12 backend/glasm/emit_glasm_convert.cpp
13 backend/glasm/emit_glasm_floating_point.cpp
14 backend/glasm/emit_glasm_image.cpp
15 backend/glasm/emit_glasm_instructions.h
16 backend/glasm/emit_glasm_integer.cpp
17 backend/glasm/emit_glasm_logical.cpp
18 backend/glasm/emit_glasm_memory.cpp
19 backend/glasm/emit_glasm_not_implemented.cpp
20 backend/glasm/emit_glasm_select.cpp
21 backend/glasm/emit_glasm_shared_memory.cpp
22 backend/glasm/emit_glasm_special.cpp
23 backend/glasm/emit_glasm_undefined.cpp
24 backend/glasm/emit_glasm_warp.cpp
25 backend/glasm/reg_alloc.cpp
26 backend/glasm/reg_alloc.h
27 backend/glsl/emit_context.cpp
28 backend/glsl/emit_context.h
29 backend/glsl/emit_glsl.cpp
30 backend/glsl/emit_glsl.h
31 backend/glsl/emit_glsl_atomic.cpp
32 backend/glsl/emit_glsl_barriers.cpp
33 backend/glsl/emit_glsl_bitwise_conversion.cpp
34 backend/glsl/emit_glsl_composite.cpp
35 backend/glsl/emit_glsl_context_get_set.cpp
36 backend/glsl/emit_glsl_control_flow.cpp
37 backend/glsl/emit_glsl_convert.cpp
38 backend/glsl/emit_glsl_floating_point.cpp
39 backend/glsl/emit_glsl_image.cpp
40 backend/glsl/emit_glsl_instructions.h
41 backend/glsl/emit_glsl_integer.cpp
42 backend/glsl/emit_glsl_logical.cpp
43 backend/glsl/emit_glsl_memory.cpp
44 backend/glsl/emit_glsl_not_implemented.cpp
45 backend/glsl/emit_glsl_select.cpp
46 backend/glsl/emit_glsl_shared_memory.cpp
47 backend/glsl/emit_glsl_special.cpp
48 backend/glsl/emit_glsl_undefined.cpp
49 backend/glsl/emit_glsl_warp.cpp
50 backend/glsl/var_alloc.cpp
51 backend/glsl/var_alloc.h
52 backend/spirv/emit_context.cpp
53 backend/spirv/emit_context.h
54 backend/spirv/emit_spirv.cpp
55 backend/spirv/emit_spirv.h
56 backend/spirv/emit_spirv_atomic.cpp
57 backend/spirv/emit_spirv_barriers.cpp
58 backend/spirv/emit_spirv_bitwise_conversion.cpp
59 backend/spirv/emit_spirv_composite.cpp
60 backend/spirv/emit_spirv_context_get_set.cpp
61 backend/spirv/emit_spirv_control_flow.cpp
62 backend/spirv/emit_spirv_convert.cpp
63 backend/spirv/emit_spirv_floating_point.cpp
64 backend/spirv/emit_spirv_image.cpp
65 backend/spirv/emit_spirv_image_atomic.cpp
66 backend/spirv/emit_spirv_instructions.h
67 backend/spirv/emit_spirv_integer.cpp
68 backend/spirv/emit_spirv_logical.cpp
69 backend/spirv/emit_spirv_memory.cpp
70 backend/spirv/emit_spirv_select.cpp
71 backend/spirv/emit_spirv_shared_memory.cpp
72 backend/spirv/emit_spirv_special.cpp
73 backend/spirv/emit_spirv_undefined.cpp
74 backend/spirv/emit_spirv_warp.cpp
75 environment.h
76 exception.h
77 frontend/ir/abstract_syntax_list.h
78 frontend/ir/attribute.cpp
79 frontend/ir/attribute.h
80 frontend/ir/basic_block.cpp
81 frontend/ir/basic_block.h
82 frontend/ir/breadth_first_search.h
83 frontend/ir/condition.cpp
84 frontend/ir/condition.h
85 frontend/ir/flow_test.cpp
86 frontend/ir/flow_test.h
87 frontend/ir/ir_emitter.cpp
88 frontend/ir/ir_emitter.h
89 frontend/ir/microinstruction.cpp
90 frontend/ir/modifiers.h
91 frontend/ir/opcodes.cpp
92 frontend/ir/opcodes.h
93 frontend/ir/opcodes.inc
94 frontend/ir/patch.cpp
95 frontend/ir/patch.h
96 frontend/ir/post_order.cpp
97 frontend/ir/post_order.h
98 frontend/ir/pred.h
99 frontend/ir/program.cpp
100 frontend/ir/program.h
101 frontend/ir/reg.h
102 frontend/ir/type.cpp
103 frontend/ir/type.h
104 frontend/ir/value.cpp
105 frontend/ir/value.h
106 frontend/maxwell/control_flow.cpp
107 frontend/maxwell/control_flow.h
108 frontend/maxwell/decode.cpp
109 frontend/maxwell/decode.h
110 frontend/maxwell/indirect_branch_table_track.cpp
111 frontend/maxwell/indirect_branch_table_track.h
112 frontend/maxwell/instruction.h
113 frontend/maxwell/location.h
114 frontend/maxwell/maxwell.inc
115 frontend/maxwell/opcodes.cpp
116 frontend/maxwell/opcodes.h
117 frontend/maxwell/structured_control_flow.cpp
118 frontend/maxwell/structured_control_flow.h
119 frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
120 frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
121 frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
122 frontend/maxwell/translate/impl/barrier_operations.cpp
123 frontend/maxwell/translate/impl/bitfield_extract.cpp
124 frontend/maxwell/translate/impl/bitfield_insert.cpp
125 frontend/maxwell/translate/impl/branch_indirect.cpp
126 frontend/maxwell/translate/impl/common_encoding.h
127 frontend/maxwell/translate/impl/common_funcs.cpp
128 frontend/maxwell/translate/impl/common_funcs.h
129 frontend/maxwell/translate/impl/condition_code_set.cpp
130 frontend/maxwell/translate/impl/double_add.cpp
131 frontend/maxwell/translate/impl/double_compare_and_set.cpp
132 frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
133 frontend/maxwell/translate/impl/double_min_max.cpp
134 frontend/maxwell/translate/impl/double_multiply.cpp
135 frontend/maxwell/translate/impl/double_set_predicate.cpp
136 frontend/maxwell/translate/impl/exit_program.cpp
137 frontend/maxwell/translate/impl/find_leading_one.cpp
138 frontend/maxwell/translate/impl/floating_point_add.cpp
139 frontend/maxwell/translate/impl/floating_point_compare.cpp
140 frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
141 frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
142 frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
143 frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
144 frontend/maxwell/translate/impl/floating_point_min_max.cpp
145 frontend/maxwell/translate/impl/floating_point_multi_function.cpp
146 frontend/maxwell/translate/impl/floating_point_multiply.cpp
147 frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
148 frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
149 frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
150 frontend/maxwell/translate/impl/half_floating_point_add.cpp
151 frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
152 frontend/maxwell/translate/impl/half_floating_point_helper.cpp
153 frontend/maxwell/translate/impl/half_floating_point_helper.h
154 frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
155 frontend/maxwell/translate/impl/half_floating_point_set.cpp
156 frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
157 frontend/maxwell/translate/impl/impl.cpp
158 frontend/maxwell/translate/impl/impl.h
159 frontend/maxwell/translate/impl/integer_add.cpp
160 frontend/maxwell/translate/impl/integer_add_three_input.cpp
161 frontend/maxwell/translate/impl/integer_compare.cpp
162 frontend/maxwell/translate/impl/integer_compare_and_set.cpp
163 frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
164 frontend/maxwell/translate/impl/integer_funnel_shift.cpp
165 frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
166 frontend/maxwell/translate/impl/integer_popcount.cpp
167 frontend/maxwell/translate/impl/integer_scaled_add.cpp
168 frontend/maxwell/translate/impl/integer_set_predicate.cpp
169 frontend/maxwell/translate/impl/integer_shift_left.cpp
170 frontend/maxwell/translate/impl/integer_shift_right.cpp
171 frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
172 frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
173 frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
174 frontend/maxwell/translate/impl/load_constant.cpp
175 frontend/maxwell/translate/impl/load_constant.h
176 frontend/maxwell/translate/impl/load_effective_address.cpp
177 frontend/maxwell/translate/impl/load_store_attribute.cpp
178 frontend/maxwell/translate/impl/load_store_local_shared.cpp
179 frontend/maxwell/translate/impl/load_store_memory.cpp
180 frontend/maxwell/translate/impl/logic_operation.cpp
181 frontend/maxwell/translate/impl/logic_operation_three_input.cpp
182 frontend/maxwell/translate/impl/move_predicate_to_register.cpp
183 frontend/maxwell/translate/impl/move_register.cpp
184 frontend/maxwell/translate/impl/move_register_to_predicate.cpp
185 frontend/maxwell/translate/impl/move_special_register.cpp
186 frontend/maxwell/translate/impl/not_implemented.cpp
187 frontend/maxwell/translate/impl/output_geometry.cpp
188 frontend/maxwell/translate/impl/pixel_load.cpp
189 frontend/maxwell/translate/impl/predicate_set_predicate.cpp
190 frontend/maxwell/translate/impl/predicate_set_register.cpp
191 frontend/maxwell/translate/impl/select_source_with_predicate.cpp
192 frontend/maxwell/translate/impl/surface_atomic_operations.cpp
193 frontend/maxwell/translate/impl/surface_load_store.cpp
194 frontend/maxwell/translate/impl/texture_fetch.cpp
195 frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
196 frontend/maxwell/translate/impl/texture_gather.cpp
197 frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
198 frontend/maxwell/translate/impl/texture_gradient.cpp
199 frontend/maxwell/translate/impl/texture_load.cpp
200 frontend/maxwell/translate/impl/texture_load_swizzled.cpp
201 frontend/maxwell/translate/impl/texture_mipmap_level.cpp
202 frontend/maxwell/translate/impl/texture_query.cpp
203 frontend/maxwell/translate/impl/video_helper.cpp
204 frontend/maxwell/translate/impl/video_helper.h
205 frontend/maxwell/translate/impl/video_minimum_maximum.cpp
206 frontend/maxwell/translate/impl/video_multiply_add.cpp
207 frontend/maxwell/translate/impl/video_set_predicate.cpp
208 frontend/maxwell/translate/impl/vote.cpp
209 frontend/maxwell/translate/impl/warp_shuffle.cpp
210 frontend/maxwell/translate/translate.cpp
211 frontend/maxwell/translate/translate.h
212 frontend/maxwell/translate_program.cpp
213 frontend/maxwell/translate_program.h
214 host_translate_info.h
215 ir_opt/collect_shader_info_pass.cpp
216 ir_opt/constant_propagation_pass.cpp
217 ir_opt/dead_code_elimination_pass.cpp
218 ir_opt/dual_vertex_pass.cpp
219 ir_opt/global_memory_to_storage_buffer_pass.cpp
220 ir_opt/identity_removal_pass.cpp
221 ir_opt/lower_fp16_to_fp32.cpp
222 ir_opt/lower_int64_to_int32.cpp
223 ir_opt/passes.h
224 ir_opt/ssa_rewrite_pass.cpp
225 ir_opt/texture_pass.cpp
226 ir_opt/verification_pass.cpp
227 object_pool.h
228 profile.h
229 program_header.h
230 runtime_info.h
231 shader_info.h
232 varying_state.h
233)
234
235target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit)
236
237if (MSVC)
238 target_compile_options(shader_recompiler PRIVATE
239 /W4
240 /WX
241 /we4018 # 'expression' : signed/unsigned mismatch
242 /we4244 # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point)
243 /we4245 # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch
244 /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
245 /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
246 /we4305 # 'context' : truncation from 'type1' to 'type2'
247 /we4800 # Implicit conversion from 'type' to bool. Possible information loss
248 /we4826 # Conversion from 'type1' to 'type2' is sign-extended. This may cause unexpected runtime behavior.
249 )
250else()
251 target_compile_options(shader_recompiler PRIVATE
252 -Werror
253 -Werror=conversion
254 -Werror=ignored-qualifiers
255 -Werror=implicit-fallthrough
256 -Werror=shadow
257 -Werror=sign-compare
258 $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
259 $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
260 -Werror=unused-variable
261
262 # Bracket depth determines maximum size of a fold expression in Clang since 9c9974c3ccb6.
263 # And this in turns limits the size of a std::array.
264 $<$<CXX_COMPILER_ID:Clang>:-fbracket-depth=1024>
265 )
266endif()
267
268create_target_directory_groups(shader_recompiler)
diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h
new file mode 100644
index 000000000..35503000c
--- /dev/null
+++ b/src/shader_recompiler/backend/bindings.h
@@ -0,0 +1,19 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Shader::Backend {
10
11struct Bindings {
12 u32 unified{};
13 u32 uniform_buffer{};
14 u32 storage_buffer{};
15 u32 texture{};
16 u32 image{};
17};
18
19} // namespace Shader::Backend
diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp
new file mode 100644
index 000000000..069c019ad
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_context.cpp
@@ -0,0 +1,154 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/bindings.h"
8#include "shader_recompiler/backend/glasm/emit_context.h"
9#include "shader_recompiler/frontend/ir/program.h"
10#include "shader_recompiler/profile.h"
11#include "shader_recompiler/runtime_info.h"
12
13namespace Shader::Backend::GLASM {
14namespace {
15std::string_view InterpDecorator(Interpolation interp) {
16 switch (interp) {
17 case Interpolation::Smooth:
18 return "";
19 case Interpolation::Flat:
20 return "FLAT ";
21 case Interpolation::NoPerspective:
22 return "NOPERSPECTIVE ";
23 }
24 throw InvalidArgument("Invalid interpolation {}", interp);
25}
26
27bool IsInputArray(Stage stage) {
28 return stage == Stage::Geometry || stage == Stage::TessellationControl ||
29 stage == Stage::TessellationEval;
30}
31} // Anonymous namespace
32
33EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
34 const RuntimeInfo& runtime_info_)
35 : info{program.info}, profile{profile_}, runtime_info{runtime_info_} {
36 // FIXME: Temporary partial implementation
37 u32 cbuf_index{};
38 for (const auto& desc : info.constant_buffer_descriptors) {
39 if (desc.count != 1) {
40 throw NotImplementedException("Constant buffer descriptor array");
41 }
42 Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index);
43 ++cbuf_index;
44 }
45 u32 ssbo_index{};
46 for (const auto& desc : info.storage_buffers_descriptors) {
47 if (desc.count != 1) {
48 throw NotImplementedException("Storage buffer descriptor array");
49 }
50 if (runtime_info.glasm_use_storage_buffers) {
51 Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer);
52 ++bindings.storage_buffer;
53 ++ssbo_index;
54 }
55 }
56 if (!runtime_info.glasm_use_storage_buffers) {
57 if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) {
58 Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1);
59 }
60 }
61 stage = program.stage;
62 switch (program.stage) {
63 case Stage::VertexA:
64 case Stage::VertexB:
65 stage_name = "vertex";
66 attrib_name = "vertex";
67 break;
68 case Stage::TessellationControl:
69 case Stage::TessellationEval:
70 stage_name = "primitive";
71 attrib_name = "primitive";
72 break;
73 case Stage::Geometry:
74 stage_name = "primitive";
75 attrib_name = "vertex";
76 break;
77 case Stage::Fragment:
78 stage_name = "fragment";
79 attrib_name = "fragment";
80 break;
81 case Stage::Compute:
82 stage_name = "invocation";
83 break;
84 }
85 const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"};
86 const VaryingState loads{info.loads.mask | info.passthrough.mask};
87 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
88 if (loads.Generic(index)) {
89 Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};",
90 InterpDecorator(info.interpolation[index]), index, attr_stage, index, index);
91 }
92 }
93 if (IsInputArray(stage) && loads.AnyComponent(IR::Attribute::PositionX)) {
94 Add("ATTRIB vertex_position=vertex.position;");
95 }
96 if (info.uses_invocation_id) {
97 Add("ATTRIB primitive_invocation=primitive.invocation;");
98 }
99 if (info.stores_tess_level_outer) {
100 Add("OUTPUT result_patch_tessouter[]={{result.patch.tessouter[0..3]}};");
101 }
102 if (info.stores_tess_level_inner) {
103 Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};");
104 }
105 if (info.stores.ClipDistances()) {
106 Add("OUTPUT result_clip[]={{result.clip[0..7]}};");
107 }
108 for (size_t index = 0; index < info.uses_patches.size(); ++index) {
109 if (!info.uses_patches[index]) {
110 continue;
111 }
112 if (stage == Stage::TessellationControl) {
113 Add("OUTPUT result_patch_attrib{}[]={{result.patch.attrib[{}..{}]}};"
114 "ATTRIB primitive_out_patch_attrib{}[]={{primitive.out.patch.attrib[{}..{}]}};",
115 index, index, index, index, index, index);
116 } else {
117 Add("ATTRIB primitive_patch_attrib{}[]={{primitive.patch.attrib[{}..{}]}};", index,
118 index, index);
119 }
120 }
121 if (stage == Stage::Fragment) {
122 Add("OUTPUT frag_color0=result.color;");
123 for (size_t index = 1; index < info.stores_frag_color.size(); ++index) {
124 Add("OUTPUT frag_color{}=result.color[{}];", index, index);
125 }
126 }
127 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
128 if (info.stores.Generic(index)) {
129 Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index);
130 }
131 }
132 image_buffer_bindings.reserve(info.image_buffer_descriptors.size());
133 for (const auto& desc : info.image_buffer_descriptors) {
134 image_buffer_bindings.push_back(bindings.image);
135 bindings.image += desc.count;
136 }
137 image_bindings.reserve(info.image_descriptors.size());
138 for (const auto& desc : info.image_descriptors) {
139 image_bindings.push_back(bindings.image);
140 bindings.image += desc.count;
141 }
142 texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size());
143 for (const auto& desc : info.texture_buffer_descriptors) {
144 texture_buffer_bindings.push_back(bindings.texture);
145 bindings.texture += desc.count;
146 }
147 texture_bindings.reserve(info.texture_descriptors.size());
148 for (const auto& desc : info.texture_descriptors) {
149 texture_bindings.push_back(bindings.texture);
150 bindings.texture += desc.count;
151 }
152}
153
154} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h
new file mode 100644
index 000000000..8433e5c00
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_context.h
@@ -0,0 +1,80 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <utility>
9#include <vector>
10
11#include <fmt/format.h>
12
13#include "shader_recompiler/backend/glasm/reg_alloc.h"
14#include "shader_recompiler/stage.h"
15
16namespace Shader {
17struct Info;
18struct Profile;
19struct RuntimeInfo;
20} // namespace Shader
21
22namespace Shader::Backend {
23struct Bindings;
24}
25
26namespace Shader::IR {
27class Inst;
28struct Program;
29} // namespace Shader::IR
30
31namespace Shader::Backend::GLASM {
32
33class EmitContext {
34public:
35 explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
36 const RuntimeInfo& runtime_info_);
37
38 template <typename... Args>
39 void Add(const char* format_str, IR::Inst& inst, Args&&... args) {
40 code += fmt::format(fmt::runtime(format_str), reg_alloc.Define(inst),
41 std::forward<Args>(args)...);
42 // TODO: Remove this
43 code += '\n';
44 }
45
46 template <typename... Args>
47 void LongAdd(const char* format_str, IR::Inst& inst, Args&&... args) {
48 code += fmt::format(fmt::runtime(format_str), reg_alloc.LongDefine(inst),
49 std::forward<Args>(args)...);
50 // TODO: Remove this
51 code += '\n';
52 }
53
54 template <typename... Args>
55 void Add(const char* format_str, Args&&... args) {
56 code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...);
57 // TODO: Remove this
58 code += '\n';
59 }
60
61 std::string code;
62 RegAlloc reg_alloc{};
63 const Info& info;
64 const Profile& profile;
65 const RuntimeInfo& runtime_info;
66
67 std::vector<u32> texture_buffer_bindings;
68 std::vector<u32> image_buffer_bindings;
69 std::vector<u32> texture_bindings;
70 std::vector<u32> image_bindings;
71
72 Stage stage{};
73 std::string_view stage_name = "invalid";
74 std::string_view attrib_name = "invalid";
75
76 u32 num_safety_loop_vars{};
77 bool uses_y_direction{};
78};
79
80} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
new file mode 100644
index 000000000..a5e8c9b6e
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
@@ -0,0 +1,492 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <string>
7#include <tuple>
8
9#include "common/div_ceil.h"
10#include "common/settings.h"
11#include "shader_recompiler/backend/bindings.h"
12#include "shader_recompiler/backend/glasm/emit_context.h"
13#include "shader_recompiler/backend/glasm/emit_glasm.h"
14#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
15#include "shader_recompiler/frontend/ir/ir_emitter.h"
16#include "shader_recompiler/frontend/ir/program.h"
17#include "shader_recompiler/profile.h"
18#include "shader_recompiler/runtime_info.h"
19
20namespace Shader::Backend::GLASM {
21namespace {
22template <class Func>
23struct FuncTraits {};
24
25template <class ReturnType_, class... Args>
26struct FuncTraits<ReturnType_ (*)(Args...)> {
27 using ReturnType = ReturnType_;
28
29 static constexpr size_t NUM_ARGS = sizeof...(Args);
30
31 template <size_t I>
32 using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
33};
34
35template <typename T>
36struct Identity {
37 Identity(T data_) : data{data_} {}
38
39 T Extract() {
40 return data;
41 }
42
43 T data;
44};
45
46template <bool scalar>
47class RegWrapper {
48public:
49 RegWrapper(EmitContext& ctx, const IR::Value& ir_value) : reg_alloc{ctx.reg_alloc} {
50 const Value value{reg_alloc.Peek(ir_value)};
51 if (value.type == Type::Register) {
52 inst = ir_value.InstRecursive();
53 reg = Register{value};
54 } else {
55 reg = value.type == Type::U64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg();
56 }
57 switch (value.type) {
58 case Type::Register:
59 case Type::Void:
60 break;
61 case Type::U32:
62 ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32);
63 break;
64 case Type::U64:
65 ctx.Add("MOV.U64 {}.x,{};", reg, value.imm_u64);
66 break;
67 }
68 }
69
70 auto Extract() {
71 if (inst) {
72 reg_alloc.Unref(*inst);
73 } else {
74 reg_alloc.FreeReg(reg);
75 }
76 return std::conditional_t<scalar, ScalarRegister, Register>{Value{reg}};
77 }
78
79private:
80 RegAlloc& reg_alloc;
81 IR::Inst* inst{};
82 Register reg{};
83};
84
85template <typename ArgType>
86class ValueWrapper {
87public:
88 ValueWrapper(EmitContext& ctx, const IR::Value& ir_value_)
89 : reg_alloc{ctx.reg_alloc}, ir_value{ir_value_}, value{reg_alloc.Peek(ir_value)} {}
90
91 ArgType Extract() {
92 if (!ir_value.IsImmediate()) {
93 reg_alloc.Unref(*ir_value.InstRecursive());
94 }
95 return value;
96 }
97
98private:
99 RegAlloc& reg_alloc;
100 const IR::Value& ir_value;
101 ArgType value;
102};
103
104template <typename ArgType>
105auto Arg(EmitContext& ctx, const IR::Value& arg) {
106 if constexpr (std::is_same_v<ArgType, Register>) {
107 return RegWrapper<false>{ctx, arg};
108 } else if constexpr (std::is_same_v<ArgType, ScalarRegister>) {
109 return RegWrapper<true>{ctx, arg};
110 } else if constexpr (std::is_base_of_v<Value, ArgType>) {
111 return ValueWrapper<ArgType>{ctx, arg};
112 } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
113 return Identity<const IR::Value&>{arg};
114 } else if constexpr (std::is_same_v<ArgType, u32>) {
115 return Identity{arg.U32()};
116 } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
117 return Identity{arg.Attribute()};
118 } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
119 return Identity{arg.Patch()};
120 } else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
121 return Identity{arg.Reg()};
122 }
123}
124
125template <auto func, bool is_first_arg_inst>
126struct InvokeCall {
127 template <typename... Args>
128 InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) {
129 if constexpr (is_first_arg_inst) {
130 func(ctx, *inst, args.Extract()...);
131 } else {
132 func(ctx, args.Extract()...);
133 }
134 }
135};
136
137template <auto func, bool is_first_arg_inst, size_t... I>
138void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
139 using Traits = FuncTraits<decltype(func)>;
140 if constexpr (is_first_arg_inst) {
141 InvokeCall<func, is_first_arg_inst>{
142 ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...};
143 } else {
144 InvokeCall<func, is_first_arg_inst>{
145 ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...};
146 }
147}
148
149template <auto func>
150void Invoke(EmitContext& ctx, IR::Inst* inst) {
151 using Traits = FuncTraits<decltype(func)>;
152 static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
153 if constexpr (Traits::NUM_ARGS == 1) {
154 Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
155 } else {
156 using FirstArgType = typename Traits::template ArgType<1>;
157 static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>;
158 using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
159 Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
160 }
161}
162
163void EmitInst(EmitContext& ctx, IR::Inst* inst) {
164 switch (inst->GetOpcode()) {
165#define OPCODE(name, result_type, ...) \
166 case IR::Opcode::name: \
167 return Invoke<&Emit##name>(ctx, inst);
168#include "shader_recompiler/frontend/ir/opcodes.inc"
169#undef OPCODE
170 }
171 throw LogicError("Invalid opcode {}", inst->GetOpcode());
172}
173
174bool IsReference(IR::Inst& inst) {
175 return inst.GetOpcode() == IR::Opcode::Reference;
176}
177
178void PrecolorInst(IR::Inst& phi) {
179 // Insert phi moves before references to avoid overwritting other phis
180 const size_t num_args{phi.NumArgs()};
181 for (size_t i = 0; i < num_args; ++i) {
182 IR::Block& phi_block{*phi.PhiBlock(i)};
183 auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()};
184 IR::IREmitter ir{phi_block, it};
185 const IR::Value arg{phi.Arg(i)};
186 if (arg.IsImmediate()) {
187 ir.PhiMove(phi, arg);
188 } else {
189 ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())});
190 }
191 }
192 for (size_t i = 0; i < num_args; ++i) {
193 IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
194 }
195}
196
197void Precolor(const IR::Program& program) {
198 for (IR::Block* const block : program.blocks) {
199 for (IR::Inst& phi : block->Instructions()) {
200 if (!IR::IsPhi(phi)) {
201 break;
202 }
203 PrecolorInst(phi);
204 }
205 }
206}
207
208void EmitCode(EmitContext& ctx, const IR::Program& program) {
209 const auto eval{
210 [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }};
211 for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
212 switch (node.type) {
213 case IR::AbstractSyntaxNode::Type::Block:
214 for (IR::Inst& inst : node.data.block->Instructions()) {
215 EmitInst(ctx, &inst);
216 }
217 break;
218 case IR::AbstractSyntaxNode::Type::If:
219 ctx.Add("MOV.S.CC RC,{};"
220 "IF NE.x;",
221 eval(node.data.if_node.cond));
222 break;
223 case IR::AbstractSyntaxNode::Type::EndIf:
224 ctx.Add("ENDIF;");
225 break;
226 case IR::AbstractSyntaxNode::Type::Loop:
227 ctx.Add("REP;");
228 break;
229 case IR::AbstractSyntaxNode::Type::Repeat:
230 if (!Settings::values.disable_shader_loop_safety_checks) {
231 const u32 loop_index{ctx.num_safety_loop_vars++};
232 const u32 vector_index{loop_index / 4};
233 const char component{"xyzw"[loop_index % 4]};
234 ctx.Add("SUB.S.CC loop{}.{},loop{}.{},1;"
235 "BRK(LT.{});",
236 vector_index, component, vector_index, component, component);
237 }
238 if (node.data.repeat.cond.IsImmediate()) {
239 if (node.data.repeat.cond.U1()) {
240 ctx.Add("ENDREP;");
241 } else {
242 ctx.Add("BRK;"
243 "ENDREP;");
244 }
245 } else {
246 ctx.Add("MOV.S.CC RC,{};"
247 "BRK(EQ.x);"
248 "ENDREP;",
249 eval(node.data.repeat.cond));
250 }
251 break;
252 case IR::AbstractSyntaxNode::Type::Break:
253 if (node.data.break_node.cond.IsImmediate()) {
254 if (node.data.break_node.cond.U1()) {
255 ctx.Add("BRK;");
256 }
257 } else {
258 ctx.Add("MOV.S.CC RC,{};"
259 "BRK (NE.x);",
260 eval(node.data.break_node.cond));
261 }
262 break;
263 case IR::AbstractSyntaxNode::Type::Return:
264 case IR::AbstractSyntaxNode::Type::Unreachable:
265 ctx.Add("RET;");
266 break;
267 }
268 }
269 if (!ctx.reg_alloc.IsEmpty()) {
270 LOG_WARNING(Shader_GLASM, "Register leak after generating code");
271 }
272}
273
274void SetupOptions(const IR::Program& program, const Profile& profile,
275 const RuntimeInfo& runtime_info, std::string& header) {
276 const Info& info{program.info};
277 const Stage stage{program.stage};
278
279 // TODO: Track the shared atomic ops
280 header += "OPTION NV_internal;"
281 "OPTION NV_shader_storage_buffer;"
282 "OPTION NV_gpu_program_fp64;";
283 if (info.uses_int64_bit_atomics) {
284 header += "OPTION NV_shader_atomic_int64;";
285 }
286 if (info.uses_atomic_f32_add) {
287 header += "OPTION NV_shader_atomic_float;";
288 }
289 if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
290 header += "OPTION NV_shader_atomic_fp16_vector;";
291 }
292 if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote ||
293 info.uses_fswzadd) {
294 header += "OPTION NV_shader_thread_group;";
295 }
296 if (info.uses_subgroup_shuffles) {
297 header += "OPTION NV_shader_thread_shuffle;";
298 }
299 if (info.uses_sparse_residency) {
300 header += "OPTION EXT_sparse_texture2;";
301 }
302 const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] ||
303 info.stores[IR::Attribute::Layer]};
304 if ((stage != Stage::Geometry && stores_viewport_layer) ||
305 info.stores[IR::Attribute::ViewportMask]) {
306 if (profile.support_viewport_index_layer_non_geometry) {
307 header += "OPTION NV_viewport_array2;";
308 }
309 }
310 if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) {
311 header += "OPTION NV_geometry_shader_passthrough;";
312 }
313 if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) {
314 header += "OPTION EXT_shader_image_load_formatted;";
315 }
316 if (profile.support_derivative_control) {
317 header += "OPTION ARB_derivative_control;";
318 }
319 if (stage == Stage::Fragment && runtime_info.force_early_z != 0) {
320 header += "OPTION NV_early_fragment_tests;";
321 }
322 if (stage == Stage::Fragment) {
323 header += "OPTION ARB_draw_buffers;";
324 }
325}
326
327std::string_view StageHeader(Stage stage) {
328 switch (stage) {
329 case Stage::VertexA:
330 case Stage::VertexB:
331 return "!!NVvp5.0\n";
332 case Stage::TessellationControl:
333 return "!!NVtcp5.0\n";
334 case Stage::TessellationEval:
335 return "!!NVtep5.0\n";
336 case Stage::Geometry:
337 return "!!NVgp5.0\n";
338 case Stage::Fragment:
339 return "!!NVfp5.0\n";
340 case Stage::Compute:
341 return "!!NVcp5.0\n";
342 }
343 throw InvalidArgument("Invalid stage {}", stage);
344}
345
346std::string_view InputPrimitive(InputTopology topology) {
347 switch (topology) {
348 case InputTopology::Points:
349 return "POINTS";
350 case InputTopology::Lines:
351 return "LINES";
352 case InputTopology::LinesAdjacency:
353 return "LINESS_ADJACENCY";
354 case InputTopology::Triangles:
355 return "TRIANGLES";
356 case InputTopology::TrianglesAdjacency:
357 return "TRIANGLES_ADJACENCY";
358 }
359 throw InvalidArgument("Invalid input topology {}", topology);
360}
361
362std::string_view OutputPrimitive(OutputTopology topology) {
363 switch (topology) {
364 case OutputTopology::PointList:
365 return "POINTS";
366 case OutputTopology::LineStrip:
367 return "LINE_STRIP";
368 case OutputTopology::TriangleStrip:
369 return "TRIANGLE_STRIP";
370 }
371 throw InvalidArgument("Invalid output topology {}", topology);
372}
373
374std::string_view GetTessMode(TessPrimitive primitive) {
375 switch (primitive) {
376 case TessPrimitive::Triangles:
377 return "TRIANGLES";
378 case TessPrimitive::Quads:
379 return "QUADS";
380 case TessPrimitive::Isolines:
381 return "ISOLINES";
382 }
383 throw InvalidArgument("Invalid tessellation primitive {}", primitive);
384}
385
386std::string_view GetTessSpacing(TessSpacing spacing) {
387 switch (spacing) {
388 case TessSpacing::Equal:
389 return "EQUAL";
390 case TessSpacing::FractionalOdd:
391 return "FRACTIONAL_ODD";
392 case TessSpacing::FractionalEven:
393 return "FRACTIONAL_EVEN";
394 }
395 throw InvalidArgument("Invalid tessellation spacing {}", spacing);
396}
397} // Anonymous namespace
398
399std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program,
400 Bindings& bindings) {
401 EmitContext ctx{program, bindings, profile, runtime_info};
402 Precolor(program);
403 EmitCode(ctx, program);
404 std::string header{StageHeader(program.stage)};
405 SetupOptions(program, profile, runtime_info, header);
406 switch (program.stage) {
407 case Stage::TessellationControl:
408 header += fmt::format("VERTICES_OUT {};", program.invocations);
409 break;
410 case Stage::TessellationEval:
411 header += fmt::format("TESS_MODE {};"
412 "TESS_SPACING {};"
413 "TESS_VERTEX_ORDER {};",
414 GetTessMode(runtime_info.tess_primitive),
415 GetTessSpacing(runtime_info.tess_spacing),
416 runtime_info.tess_clockwise ? "CW" : "CCW");
417 break;
418 case Stage::Geometry:
419 header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology));
420 if (program.is_geometry_passthrough) {
421 if (profile.support_geometry_shader_passthrough) {
422 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
423 if (program.info.passthrough.Generic(index)) {
424 header += fmt::format("PASSTHROUGH result.attrib[{}];", index);
425 }
426 }
427 if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
428 header += "PASSTHROUGH result.position;";
429 }
430 } else {
431 LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported");
432 }
433 } else {
434 header +=
435 fmt::format("VERTICES_OUT {};"
436 "PRIMITIVE_OUT {};",
437 program.output_vertices, OutputPrimitive(program.output_topology));
438 }
439 break;
440 case Stage::Compute:
441 header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0],
442 program.workgroup_size[1], program.workgroup_size[2]);
443 break;
444 default:
445 break;
446 }
447 if (program.shared_memory_size > 0) {
448 header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size);
449 header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};");
450 }
451 header += "TEMP ";
452 for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) {
453 header += fmt::format("R{},", index);
454 }
455 if (program.local_memory_size > 0) {
456 header += fmt::format("lmem[{}],", program.local_memory_size);
457 }
458 if (program.info.uses_fswzadd) {
459 header += "FSWZA[4],FSWZB[4],";
460 }
461 const u32 num_safety_loop_vectors{Common::DivCeil(ctx.num_safety_loop_vars, 4u)};
462 for (u32 index = 0; index < num_safety_loop_vectors; ++index) {
463 header += fmt::format("loop{},", index);
464 }
465 header += "RC;"
466 "LONG TEMP ";
467 for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) {
468 header += fmt::format("D{},", index);
469 }
470 header += "DC;";
471 if (program.info.uses_fswzadd) {
472 header += "MOV.F FSWZA[0],-1;"
473 "MOV.F FSWZA[1],1;"
474 "MOV.F FSWZA[2],-1;"
475 "MOV.F FSWZA[3],0;"
476 "MOV.F FSWZB[0],-1;"
477 "MOV.F FSWZB[1],-1;"
478 "MOV.F FSWZB[2],1;"
479 "MOV.F FSWZB[3],-1;";
480 }
481 for (u32 index = 0; index < num_safety_loop_vectors; ++index) {
482 header += fmt::format("MOV.S loop{},{{0x2000,0x2000,0x2000,0x2000}};", index);
483 }
484 if (ctx.uses_y_direction) {
485 header += "PARAM y_direction[1]={state.material.front.ambient};";
486 }
487 ctx.code.insert(0, header);
488 ctx.code += "END";
489 return ctx.code;
490}
491
492} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h
new file mode 100644
index 000000000..bcb55f062
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.h
@@ -0,0 +1,25 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8
9#include "shader_recompiler/backend/bindings.h"
10#include "shader_recompiler/frontend/ir/program.h"
11#include "shader_recompiler/profile.h"
12#include "shader_recompiler/runtime_info.h"
13
14namespace Shader::Backend::GLASM {
15
16[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
17 IR::Program& program, Bindings& bindings);
18
19[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
20 IR::Program& program) {
21 Bindings binding;
22 return EmitGLASM(profile, runtime_info, program, binding);
23}
24
25} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
new file mode 100644
index 000000000..9201ccd39
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
@@ -0,0 +1,91 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/glasm/emit_context.h"
6#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
7#include "shader_recompiler/frontend/ir/value.h"
8
9namespace Shader::Backend::GLASM {
10
11static void Alias(IR::Inst& inst, const IR::Value& value) {
12 if (value.IsImmediate()) {
13 return;
14 }
15 IR::Inst& value_inst{RegAlloc::AliasInst(*value.Inst())};
16 value_inst.DestructiveAddUsage(inst.UseCount());
17 value_inst.DestructiveRemoveUsage();
18 inst.SetDefinition(value_inst.Definition<Id>());
19}
20
21void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) {
22 Alias(inst, value);
23}
24
25void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) {
26 // Fake one usage to get a real register out of the condition
27 inst.DestructiveAddUsage(1);
28 const Register ret{ctx.reg_alloc.Define(inst)};
29 const ScalarS32 input{ctx.reg_alloc.Consume(value)};
30 if (ret != input) {
31 ctx.Add("MOV.S {},{};", ret, input);
32 }
33}
34
35void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
36 Alias(inst, value);
37}
38
39void EmitBitCastU32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) {
40 Alias(inst, value);
41}
42
43void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) {
44 Alias(inst, value);
45}
46
47void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
48 Alias(inst, value);
49}
50
51void EmitBitCastF32U32(EmitContext&, IR::Inst& inst, const IR::Value& value) {
52 Alias(inst, value);
53}
54
55void EmitBitCastF64U64(EmitContext&, IR::Inst& inst, const IR::Value& value) {
56 Alias(inst, value);
57}
58
59void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
60 ctx.LongAdd("PK64.U {}.x,{};", inst, value);
61}
62
63void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
64 ctx.Add("UP64.U {}.xy,{}.x;", inst, value);
65}
66
67void EmitPackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
68 throw NotImplementedException("GLASM instruction");
69}
70
71void EmitUnpackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
72 throw NotImplementedException("GLASM instruction");
73}
74
75void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) {
76 ctx.Add("PK2H {}.x,{};", inst, value);
77}
78
79void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) {
80 ctx.Add("UP2H {}.xy,{}.x;", inst, value);
81}
82
83void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
84 ctx.LongAdd("PK64 {}.x,{};", inst, value);
85}
86
87void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
88 ctx.Add("UP64 {}.xy,{}.x;", inst, value);
89}
90
91} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
new file mode 100644
index 000000000..bff0b7c1c
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
@@ -0,0 +1,244 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/glasm/emit_context.h"
6#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
7#include "shader_recompiler/frontend/ir/value.h"
8
9namespace Shader::Backend::GLASM {
10namespace {
11template <auto read_imm, char type, typename... Values>
12void CompositeConstruct(EmitContext& ctx, IR::Inst& inst, Values&&... elements) {
13 const Register ret{ctx.reg_alloc.Define(inst)};
14 if (std::ranges::any_of(std::array{elements...},
15 [](const IR::Value& value) { return value.IsImmediate(); })) {
16 using Type = std::invoke_result_t<decltype(read_imm), IR::Value>;
17 const std::array<Type, 4> values{(elements.IsImmediate() ? (elements.*read_imm)() : 0)...};
18 ctx.Add("MOV.{} {},{{{},{},{},{}}};", type, ret, fmt::to_string(values[0]),
19 fmt::to_string(values[1]), fmt::to_string(values[2]), fmt::to_string(values[3]));
20 }
21 size_t index{};
22 for (const IR::Value& element : {elements...}) {
23 if (!element.IsImmediate()) {
24 const ScalarU32 value{ctx.reg_alloc.Consume(element)};
25 ctx.Add("MOV.{} {}.{},{};", type, ret, "xyzw"[index], value);
26 }
27 ++index;
28 }
29}
30
31void CompositeExtract(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index, char type) {
32 const Register ret{ctx.reg_alloc.Define(inst)};
33 if (ret == composite && index == 0) {
34 // No need to do anything here, the source and destination are the same register
35 return;
36 }
37 ctx.Add("MOV.{} {}.x,{}.{};", type, ret, composite, "xyzw"[index]);
38}
39
40template <typename ObjectType>
41void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, ObjectType object,
42 u32 index, char type) {
43 const Register ret{ctx.reg_alloc.Define(inst)};
44 const char swizzle{"xyzw"[index]};
45 if (ret != composite && ret == object) {
46 // The object is aliased with the return value, so we have to use a temporary to insert
47 ctx.Add("MOV.{} RC,{};"
48 "MOV.{} RC.{},{};"
49 "MOV.{} {},RC;",
50 type, composite, type, swizzle, object, type, ret);
51 } else if (ret != composite) {
52 // The input composite is not aliased with the return value so we have to copy it before
53 // hand. But the insert object is not aliased with the return value, so we don't have to
54 // worry about that
55 ctx.Add("MOV.{} {},{};"
56 "MOV.{} {}.{},{};",
57 type, ret, composite, type, ret, swizzle, object);
58 } else {
59 // The return value is alised so we can just insert the object, it doesn't matter if it's
60 // aliased
61 ctx.Add("MOV.{} {}.{},{};", type, ret, swizzle, object);
62 }
63}
64} // Anonymous namespace
65
66void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
67 const IR::Value& e2) {
68 CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2);
69}
70
71void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
72 const IR::Value& e2, const IR::Value& e3) {
73 CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3);
74}
75
76void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
77 const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) {
78 CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3, e4);
79}
80
81void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
82 CompositeExtract(ctx, inst, composite, index, 'U');
83}
84
85void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
86 CompositeExtract(ctx, inst, composite, index, 'U');
87}
88
89void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
90 CompositeExtract(ctx, inst, composite, index, 'U');
91}
92
93void EmitCompositeInsertU32x2([[maybe_unused]] EmitContext& ctx,
94 [[maybe_unused]] Register composite,
95 [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
96 throw NotImplementedException("GLASM instruction");
97}
98
99void EmitCompositeInsertU32x3([[maybe_unused]] EmitContext& ctx,
100 [[maybe_unused]] Register composite,
101 [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
102 throw NotImplementedException("GLASM instruction");
103}
104
105void EmitCompositeInsertU32x4([[maybe_unused]] EmitContext& ctx,
106 [[maybe_unused]] Register composite,
107 [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
108 throw NotImplementedException("GLASM instruction");
109}
110
111void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
112 [[maybe_unused]] Register e2) {
113 throw NotImplementedException("GLASM instruction");
114}
115
116void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
117 [[maybe_unused]] Register e2, [[maybe_unused]] Register e3) {
118 throw NotImplementedException("GLASM instruction");
119}
120
121void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
122 [[maybe_unused]] Register e2, [[maybe_unused]] Register e3,
123 [[maybe_unused]] Register e4) {
124 throw NotImplementedException("GLASM instruction");
125}
126
127void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx,
128 [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
129 throw NotImplementedException("GLASM instruction");
130}
131
132void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx,
133 [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
134 throw NotImplementedException("GLASM instruction");
135}
136
137void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx,
138 [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
139 throw NotImplementedException("GLASM instruction");
140}
141
142void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx,
143 [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
144 [[maybe_unused]] u32 index) {
145 throw NotImplementedException("GLASM instruction");
146}
147
148void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx,
149 [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
150 [[maybe_unused]] u32 index) {
151 throw NotImplementedException("GLASM instruction");
152}
153
154void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx,
155 [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
156 [[maybe_unused]] u32 index) {
157 throw NotImplementedException("GLASM instruction");
158}
159
160void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
161 const IR::Value& e2) {
162 CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2);
163}
164
165void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
166 const IR::Value& e2, const IR::Value& e3) {
167 CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3);
168}
169
170void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
171 const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) {
172 CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3, e4);
173}
174
175void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
176 CompositeExtract(ctx, inst, composite, index, 'F');
177}
178
179void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
180 CompositeExtract(ctx, inst, composite, index, 'F');
181}
182
183void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
184 CompositeExtract(ctx, inst, composite, index, 'F');
185}
186
187void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite,
188 ScalarF32 object, u32 index) {
189 CompositeInsert(ctx, inst, composite, object, index, 'F');
190}
191
192void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite,
193 ScalarF32 object, u32 index) {
194 CompositeInsert(ctx, inst, composite, object, index, 'F');
195}
196
197void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite,
198 ScalarF32 object, u32 index) {
199 CompositeInsert(ctx, inst, composite, object, index, 'F');
200}
201
202void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) {
203 throw NotImplementedException("GLASM instruction");
204}
205
206void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) {
207 throw NotImplementedException("GLASM instruction");
208}
209
210void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) {
211 throw NotImplementedException("GLASM instruction");
212}
213
214void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) {
215 throw NotImplementedException("GLASM instruction");
216}
217
218void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) {
219 throw NotImplementedException("GLASM instruction");
220}
221
222void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) {
223 throw NotImplementedException("GLASM instruction");
224}
225
226void EmitCompositeInsertF64x2([[maybe_unused]] EmitContext& ctx,
227 [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
228 [[maybe_unused]] u32 index) {
229 throw NotImplementedException("GLASM instruction");
230}
231
232void EmitCompositeInsertF64x3([[maybe_unused]] EmitContext& ctx,
233 [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
234 [[maybe_unused]] u32 index) {
235 throw NotImplementedException("GLASM instruction");
236}
237
238void EmitCompositeInsertF64x4([[maybe_unused]] EmitContext& ctx,
239 [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
240 [[maybe_unused]] u32 index) {
241 throw NotImplementedException("GLASM instruction");
242}
243
244} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
new file mode 100644
index 000000000..02c9dc6d7
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
@@ -0,0 +1,346 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glasm/emit_context.h"
8#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10#include "shader_recompiler/profile.h"
11#include "shader_recompiler/shader_info.h"
12
13namespace Shader::Backend::GLASM {
14namespace {
15void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
16 std::string_view size) {
17 if (!binding.IsImmediate()) {
18 throw NotImplementedException("Indirect constant buffer loading");
19 }
20 const Register ret{ctx.reg_alloc.Define(inst)};
21 if (offset.type == Type::U32) {
22 // Avoid reading arrays out of bounds, matching hardware's behavior
23 if (offset.imm_u32 >= 0x10'000) {
24 ctx.Add("MOV.S {},0;", ret);
25 return;
26 }
27 }
28 ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset);
29}
30
31bool IsInputArray(Stage stage) {
32 return stage == Stage::Geometry || stage == Stage::TessellationControl ||
33 stage == Stage::TessellationEval;
34}
35
36std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) {
37 return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
38}
39
40u32 TexCoordIndex(IR::Attribute attr) {
41 return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4;
42}
43} // Anonymous namespace
44
45void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
46 GetCbuf(ctx, inst, binding, offset, "U8");
47}
48
49void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
50 GetCbuf(ctx, inst, binding, offset, "S8");
51}
52
53void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
54 GetCbuf(ctx, inst, binding, offset, "U16");
55}
56
57void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
58 GetCbuf(ctx, inst, binding, offset, "S16");
59}
60
61void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
62 GetCbuf(ctx, inst, binding, offset, "U32");
63}
64
65void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
66 GetCbuf(ctx, inst, binding, offset, "F32");
67}
68
69void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
70 ScalarU32 offset) {
71 GetCbuf(ctx, inst, binding, offset, "U32X2");
72}
73
74void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex) {
75 const u32 element{static_cast<u32>(attr) % 4};
76 const char swizzle{"xyzw"[element]};
77 if (IR::IsGeneric(attr)) {
78 const u32 index{IR::GenericAttributeIndex(attr)};
79 ctx.Add("MOV.F {}.x,in_attr{}{}[0].{};", inst, index, VertexIndex(ctx, vertex), swizzle);
80 return;
81 }
82 if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) {
83 const u32 index{TexCoordIndex(attr)};
84 ctx.Add("MOV.F {}.x,{}.texcoord[{}].{};", inst, ctx.attrib_name, index, swizzle);
85 return;
86 }
87 switch (attr) {
88 case IR::Attribute::PrimitiveId:
89 ctx.Add("MOV.S {}.x,primitive.id;", inst);
90 break;
91 case IR::Attribute::PositionX:
92 case IR::Attribute::PositionY:
93 case IR::Attribute::PositionZ:
94 case IR::Attribute::PositionW:
95 if (IsInputArray(ctx.stage)) {
96 ctx.Add("MOV.F {}.x,vertex_position{}.{};", inst, VertexIndex(ctx, vertex), swizzle);
97 } else {
98 ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.attrib_name, swizzle);
99 }
100 break;
101 case IR::Attribute::ColorFrontDiffuseR:
102 case IR::Attribute::ColorFrontDiffuseG:
103 case IR::Attribute::ColorFrontDiffuseB:
104 case IR::Attribute::ColorFrontDiffuseA:
105 ctx.Add("MOV.F {}.x,{}.color.{};", inst, ctx.attrib_name, swizzle);
106 break;
107 case IR::Attribute::PointSpriteS:
108 case IR::Attribute::PointSpriteT:
109 ctx.Add("MOV.F {}.x,{}.pointcoord.{};", inst, ctx.attrib_name, swizzle);
110 break;
111 case IR::Attribute::TessellationEvaluationPointU:
112 case IR::Attribute::TessellationEvaluationPointV:
113 ctx.Add("MOV.F {}.x,vertex.tesscoord.{};", inst, swizzle);
114 break;
115 case IR::Attribute::InstanceId:
116 ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name);
117 break;
118 case IR::Attribute::VertexId:
119 ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
120 break;
121 case IR::Attribute::FrontFace:
122 ctx.Add("CMP.S {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name);
123 break;
124 default:
125 throw NotImplementedException("Get attribute {}", attr);
126 }
127}
128
129void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value,
130 [[maybe_unused]] ScalarU32 vertex) {
131 const u32 element{static_cast<u32>(attr) % 4};
132 const char swizzle{"xyzw"[element]};
133 if (IR::IsGeneric(attr)) {
134 const u32 index{IR::GenericAttributeIndex(attr)};
135 ctx.Add("MOV.F out_attr{}[0].{},{};", index, swizzle, value);
136 return;
137 }
138 if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9R) {
139 const u32 index{TexCoordIndex(attr)};
140 ctx.Add("MOV.F result.texcoord[{}].{},{};", index, swizzle, value);
141 return;
142 }
143 switch (attr) {
144 case IR::Attribute::Layer:
145 if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) {
146 ctx.Add("MOV.F result.layer.x,{};", value);
147 } else {
148 LOG_WARNING(Shader_GLASM,
149 "Layer stored outside of geometry shader not supported by device");
150 }
151 break;
152 case IR::Attribute::ViewportIndex:
153 if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) {
154 ctx.Add("MOV.F result.viewport.x,{};", value);
155 } else {
156 LOG_WARNING(Shader_GLASM,
157 "Viewport stored outside of geometry shader not supported by device");
158 }
159 break;
160 case IR::Attribute::ViewportMask:
161 // NV_viewport_array2 is required to access result.viewportmask, regardless of shader stage.
162 if (ctx.profile.support_viewport_index_layer_non_geometry) {
163 ctx.Add("MOV.F result.viewportmask[0].x,{};", value);
164 } else {
165 LOG_WARNING(Shader_GLASM, "Device does not support storing to ViewportMask");
166 }
167 break;
168 case IR::Attribute::PointSize:
169 ctx.Add("MOV.F result.pointsize.x,{};", value);
170 break;
171 case IR::Attribute::PositionX:
172 case IR::Attribute::PositionY:
173 case IR::Attribute::PositionZ:
174 case IR::Attribute::PositionW:
175 ctx.Add("MOV.F result.position.{},{};", swizzle, value);
176 break;
177 case IR::Attribute::ColorFrontDiffuseR:
178 case IR::Attribute::ColorFrontDiffuseG:
179 case IR::Attribute::ColorFrontDiffuseB:
180 case IR::Attribute::ColorFrontDiffuseA:
181 ctx.Add("MOV.F result.color.{},{};", swizzle, value);
182 break;
183 case IR::Attribute::ColorFrontSpecularR:
184 case IR::Attribute::ColorFrontSpecularG:
185 case IR::Attribute::ColorFrontSpecularB:
186 case IR::Attribute::ColorFrontSpecularA:
187 ctx.Add("MOV.F result.color.secondary.{},{};", swizzle, value);
188 break;
189 case IR::Attribute::ColorBackDiffuseR:
190 case IR::Attribute::ColorBackDiffuseG:
191 case IR::Attribute::ColorBackDiffuseB:
192 case IR::Attribute::ColorBackDiffuseA:
193 ctx.Add("MOV.F result.color.back.{},{};", swizzle, value);
194 break;
195 case IR::Attribute::ColorBackSpecularR:
196 case IR::Attribute::ColorBackSpecularG:
197 case IR::Attribute::ColorBackSpecularB:
198 case IR::Attribute::ColorBackSpecularA:
199 ctx.Add("MOV.F result.color.back.secondary.{},{};", swizzle, value);
200 break;
201 case IR::Attribute::FogCoordinate:
202 ctx.Add("MOV.F result.fogcoord.x,{};", value);
203 break;
204 case IR::Attribute::ClipDistance0:
205 case IR::Attribute::ClipDistance1:
206 case IR::Attribute::ClipDistance2:
207 case IR::Attribute::ClipDistance3:
208 case IR::Attribute::ClipDistance4:
209 case IR::Attribute::ClipDistance5:
210 case IR::Attribute::ClipDistance6:
211 case IR::Attribute::ClipDistance7: {
212 const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)};
213 ctx.Add("MOV.F result.clip[{}].x,{};", index, value);
214 break;
215 }
216 default:
217 throw NotImplementedException("Set attribute {}", attr);
218 }
219}
220
221void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex) {
222 // RC.x = base_index
223 // RC.y = masked_index
224 // RC.z = compare_index
225 ctx.Add("SHR.S RC.x,{},2;"
226 "AND.S RC.y,RC.x,3;"
227 "SHR.S RC.z,{},4;",
228 offset, offset);
229
230 const Register ret{ctx.reg_alloc.Define(inst)};
231 u32 num_endifs{};
232 const auto read{[&](u32 compare_index, const std::array<std::string, 4>& values) {
233 ++num_endifs;
234 ctx.Add("SEQ.S.CC RC.w,RC.z,{};" // compare_index
235 "IF NE.w;"
236 // X
237 "SEQ.S.CC RC.w,RC.y,0;"
238 "IF NE.w;"
239 "MOV {}.x,{};"
240 "ELSE;"
241 // Y
242 "SEQ.S.CC RC.w,RC.y,1;"
243 "IF NE.w;"
244 "MOV {}.x,{};"
245 "ELSE;"
246 // Z
247 "SEQ.S.CC RC.w,RC.y,2;"
248 "IF NE.w;"
249 "MOV {}.x,{};"
250 "ELSE;"
251 // W
252 "MOV {}.x,{};"
253 "ENDIF;"
254 "ENDIF;"
255 "ENDIF;"
256 "ELSE;",
257 compare_index, ret, values[0], ret, values[1], ret, values[2], ret, values[3]);
258 }};
259 const auto read_swizzled{[&](u32 compare_index, std::string_view value) {
260 const std::array values{fmt::format("{}.x", value), fmt::format("{}.y", value),
261 fmt::format("{}.z", value), fmt::format("{}.w", value)};
262 read(compare_index, values);
263 }};
264 if (ctx.info.loads.AnyComponent(IR::Attribute::PositionX)) {
265 const u32 index{static_cast<u32>(IR::Attribute::PositionX)};
266 if (IsInputArray(ctx.stage)) {
267 read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex)));
268 } else {
269 read_swizzled(index, fmt::format("{}.position", ctx.attrib_name));
270 }
271 }
272 for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) {
273 if (!ctx.info.loads.Generic(index)) {
274 continue;
275 }
276 read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex)));
277 }
278 for (u32 i = 0; i < num_endifs; ++i) {
279 ctx.Add("ENDIF;");
280 }
281}
282
283void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset,
284 [[maybe_unused]] ScalarF32 value, [[maybe_unused]] ScalarU32 vertex) {
285 throw NotImplementedException("GLASM instruction");
286}
287
288void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) {
289 if (!IR::IsGeneric(patch)) {
290 throw NotImplementedException("Non-generic patch load");
291 }
292 const u32 index{IR::GenericPatchIndex(patch)};
293 const u32 element{IR::GenericPatchElement(patch)};
294 const char swizzle{"xyzw"[element]};
295 const std::string_view out{ctx.stage == Stage::TessellationControl ? ".out" : ""};
296 ctx.Add("MOV.F {},primitive{}.patch.attrib[{}].{};", inst, out, index, swizzle);
297}
298
299void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value) {
300 if (IR::IsGeneric(patch)) {
301 const u32 index{IR::GenericPatchIndex(patch)};
302 const u32 element{IR::GenericPatchElement(patch)};
303 ctx.Add("MOV.F result.patch.attrib[{}].{},{};", index, "xyzw"[element], value);
304 return;
305 }
306 switch (patch) {
307 case IR::Patch::TessellationLodLeft:
308 case IR::Patch::TessellationLodRight:
309 case IR::Patch::TessellationLodTop:
310 case IR::Patch::TessellationLodBottom: {
311 const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
312 ctx.Add("MOV.F result.patch.tessouter[{}].x,{};", index, value);
313 break;
314 }
315 case IR::Patch::TessellationLodInteriorU:
316 ctx.Add("MOV.F result.patch.tessinner[0].x,{};", value);
317 break;
318 case IR::Patch::TessellationLodInteriorV:
319 ctx.Add("MOV.F result.patch.tessinner[1].x,{};", value);
320 break;
321 default:
322 throw NotImplementedException("Patch {}", patch);
323 }
324}
325
326void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value) {
327 ctx.Add("MOV.F frag_color{}.{},{};", index, "xyzw"[component], value);
328}
329
330void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value) {
331 ctx.Add("MOV.S result.samplemask.x,{};", value);
332}
333
334void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value) {
335 ctx.Add("MOV.F result.depth.z,{};", value);
336}
337
338void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset) {
339 ctx.Add("MOV.U {},lmem[{}].x;", inst, word_offset);
340}
341
342void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value) {
343 ctx.Add("MOV.U lmem[{}].x,{};", word_offset, value);
344}
345
346} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp
new file mode 100644
index 000000000..ccdf1cbc8
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp
@@ -0,0 +1,231 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glasm/emit_context.h"
8#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/ir/value.h"
11
12namespace Shader::Backend::GLASM {
13namespace {
14std::string_view FpRounding(IR::FpRounding fp_rounding) {
15 switch (fp_rounding) {
16 case IR::FpRounding::DontCare:
17 return "";
18 case IR::FpRounding::RN:
19 return ".ROUND";
20 case IR::FpRounding::RZ:
21 return ".TRUNC";
22 case IR::FpRounding::RM:
23 return ".FLR";
24 case IR::FpRounding::RP:
25 return ".CEIL";
26 }
27 throw InvalidArgument("Invalid floating-point rounding {}", fp_rounding);
28}
29
30template <typename InputType>
31void Convert(EmitContext& ctx, IR::Inst& inst, InputType value, std::string_view dest,
32 std::string_view src, bool is_long_result) {
33 const std::string_view fp_rounding{FpRounding(inst.Flags<IR::FpControl>().rounding)};
34 const auto ret{is_long_result ? ctx.reg_alloc.LongDefine(inst) : ctx.reg_alloc.Define(inst)};
35 ctx.Add("CVT.{}.{}{} {}.x,{};", dest, src, fp_rounding, ret, value);
36}
37} // Anonymous namespace
38
39void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value) {
40 Convert(ctx, inst, value, "S16", "F16", false);
41}
42
43void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
44 Convert(ctx, inst, value, "S16", "F32", false);
45}
46
47void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
48 Convert(ctx, inst, value, "S16", "F64", false);
49}
50
51void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
52 Convert(ctx, inst, value, "S32", "F16", false);
53}
54
55void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
56 Convert(ctx, inst, value, "S32", "F32", false);
57}
58
59void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
60 Convert(ctx, inst, value, "S32", "F64", false);
61}
62
63void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value) {
64 Convert(ctx, inst, value, "S64", "F16", true);
65}
66
67void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
68 Convert(ctx, inst, value, "S64", "F32", true);
69}
70
71void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
72 Convert(ctx, inst, value, "S64", "F64", true);
73}
74
75void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value) {
76 Convert(ctx, inst, value, "U16", "F16", false);
77}
78
79void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
80 Convert(ctx, inst, value, "U16", "F32", false);
81}
82
83void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
84 Convert(ctx, inst, value, "U16", "F64", false);
85}
86
87void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
88 Convert(ctx, inst, value, "U32", "F16", false);
89}
90
91void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
92 Convert(ctx, inst, value, "U32", "F32", false);
93}
94
95void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
96 Convert(ctx, inst, value, "U32", "F64", false);
97}
98
99void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value) {
100 Convert(ctx, inst, value, "U64", "F16", true);
101}
102
103void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
104 Convert(ctx, inst, value, "U64", "F32", true);
105}
106
107void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
108 Convert(ctx, inst, value, "U64", "F64", true);
109}
110
111void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
112 Convert(ctx, inst, value, "U64", "U32", true);
113}
114
115void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value) {
116 Convert(ctx, inst, value, "U32", "U64", false);
117}
118
119void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
120 Convert(ctx, inst, value, "F16", "F32", false);
121}
122
123void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
124 Convert(ctx, inst, value, "F32", "F16", false);
125}
126
127void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
128 Convert(ctx, inst, value, "F32", "F64", false);
129}
130
131void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
132 Convert(ctx, inst, value, "F64", "F32", true);
133}
134
135void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value) {
136 Convert(ctx, inst, value, "F16", "S8", false);
137}
138
139void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value) {
140 Convert(ctx, inst, value, "F16", "S16", false);
141}
142
143void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
144 Convert(ctx, inst, value, "F16", "S32", false);
145}
146
147void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value) {
148 Convert(ctx, inst, value, "F16", "S64", false);
149}
150
151void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value) {
152 Convert(ctx, inst, value, "F16", "U8", false);
153}
154
155void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value) {
156 Convert(ctx, inst, value, "F16", "U16", false);
157}
158
159void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
160 Convert(ctx, inst, value, "F16", "U32", false);
161}
162
163void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value) {
164 Convert(ctx, inst, value, "F16", "U64", false);
165}
166
167void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value) {
168 Convert(ctx, inst, value, "F32", "S8", false);
169}
170
171void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value) {
172 Convert(ctx, inst, value, "F32", "S16", false);
173}
174
175void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
176 Convert(ctx, inst, value, "F32", "S32", false);
177}
178
179void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value) {
180 Convert(ctx, inst, value, "F32", "S64", false);
181}
182
183void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value) {
184 Convert(ctx, inst, value, "F32", "U8", false);
185}
186
187void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value) {
188 Convert(ctx, inst, value, "F32", "U16", false);
189}
190
191void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
192 Convert(ctx, inst, value, "F32", "U32", false);
193}
194
195void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value) {
196 Convert(ctx, inst, value, "F32", "U64", false);
197}
198
199void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value) {
200 Convert(ctx, inst, value, "F64", "S8", true);
201}
202
203void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value) {
204 Convert(ctx, inst, value, "F64", "S16", true);
205}
206
207void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
208 Convert(ctx, inst, value, "F64", "S32", true);
209}
210
211void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value) {
212 Convert(ctx, inst, value, "F64", "S64", true);
213}
214
215void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value) {
216 Convert(ctx, inst, value, "F64", "U8", true);
217}
218
219void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value) {
220 Convert(ctx, inst, value, "F64", "U16", true);
221}
222
223void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
224 Convert(ctx, inst, value, "F64", "U32", true);
225}
226
227void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value) {
228 Convert(ctx, inst, value, "F64", "U64", true);
229}
230
231} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp
new file mode 100644
index 000000000..4ed58619d
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp
@@ -0,0 +1,414 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glasm/emit_context.h"
8#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/ir/value.h"
11
12namespace Shader::Backend::GLASM {
13namespace {
14template <typename InputType>
15void Compare(EmitContext& ctx, IR::Inst& inst, InputType lhs, InputType rhs, std::string_view op,
16 std::string_view type, bool ordered, bool inequality = false) {
17 const Register ret{ctx.reg_alloc.Define(inst)};
18 ctx.Add("{}.{} RC.x,{},{};", op, type, lhs, rhs);
19 if (ordered && inequality) {
20 ctx.Add("SEQ.{} RC.y,{},{};"
21 "SEQ.{} RC.z,{},{};"
22 "AND.U RC.x,RC.x,RC.y;"
23 "AND.U RC.x,RC.x,RC.z;"
24 "SNE.S {}.x,RC.x,0;",
25 type, lhs, lhs, type, rhs, rhs, ret);
26 } else if (ordered) {
27 ctx.Add("SNE.S {}.x,RC.x,0;", ret);
28 } else {
29 ctx.Add("SNE.{} RC.y,{},{};"
30 "SNE.{} RC.z,{},{};"
31 "OR.U RC.x,RC.x,RC.y;"
32 "OR.U RC.x,RC.x,RC.z;"
33 "SNE.S {}.x,RC.x,0;",
34 type, lhs, lhs, type, rhs, rhs, ret);
35 }
36}
37
38template <typename InputType>
39void Clamp(EmitContext& ctx, Register ret, InputType value, InputType min_value,
40 InputType max_value, std::string_view type) {
41 // Call MAX first to properly clamp nan to min_value instead
42 ctx.Add("MAX.{} RC.x,{},{};"
43 "MIN.{} {}.x,RC.x,{};",
44 type, min_value, value, type, ret, max_value);
45}
46
47std::string_view Precise(IR::Inst& inst) {
48 const bool precise{inst.Flags<IR::FpControl>().no_contraction};
49 return precise ? ".PREC" : "";
50}
51} // Anonymous namespace
52
53void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
54 [[maybe_unused]] Register value) {
55 throw NotImplementedException("GLASM instruction");
56}
57
58void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
59 ctx.Add("MOV.F {}.x,|{}|;", inst, value);
60}
61
62void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
63 ctx.LongAdd("MOV.F64 {}.x,|{}|;", inst, value);
64}
65
66void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
67 [[maybe_unused]] Register a, [[maybe_unused]] Register b) {
68 throw NotImplementedException("GLASM instruction");
69}
70
71void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
72 ctx.Add("ADD.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b);
73}
74
75void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
76 ctx.Add("ADD.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b);
77}
78
79void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
80 [[maybe_unused]] Register a, [[maybe_unused]] Register b,
81 [[maybe_unused]] Register c) {
82 throw NotImplementedException("GLASM instruction");
83}
84
85void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c) {
86 ctx.Add("MAD.F{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b, c);
87}
88
89void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c) {
90 ctx.Add("MAD.F64{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b, c);
91}
92
93void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
94 ctx.Add("MAX.F {}.x,{},{};", inst, a, b);
95}
96
97void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
98 ctx.LongAdd("MAX.F64 {}.x,{},{};", inst, a, b);
99}
100
101void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
102 ctx.Add("MIN.F {}.x,{},{};", inst, a, b);
103}
104
105void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
106 ctx.LongAdd("MIN.F64 {}.x,{},{};", inst, a, b);
107}
108
109void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
110 [[maybe_unused]] Register a, [[maybe_unused]] Register b) {
111 throw NotImplementedException("GLASM instruction");
112}
113
114void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
115 ctx.Add("MUL.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b);
116}
117
118void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
119 ctx.Add("MUL.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b);
120}
121
122void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
123 throw NotImplementedException("GLASM instruction");
124}
125
126void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value) {
127 ctx.Add("MOV.F {}.x,-{};", inst, value);
128}
129
130void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value) {
131 ctx.LongAdd("MOV.F64 {}.x,-{};", inst, value);
132}
133
134void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
135 ctx.Add("SIN {}.x,{};", inst, value);
136}
137
138void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
139 ctx.Add("COS {}.x,{};", inst, value);
140}
141
142void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
143 ctx.Add("EX2 {}.x,{};", inst, value);
144}
145
146void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
147 ctx.Add("LG2 {}.x,{};", inst, value);
148}
149
150void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
151 ctx.Add("RCP {}.x,{};", inst, value);
152}
153
154void EmitFPRecip64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
155 throw NotImplementedException("GLASM instruction");
156}
157
158void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
159 ctx.Add("RSQ {}.x,{};", inst, value);
160}
161
162void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
163 throw NotImplementedException("GLASM instruction");
164}
165
166void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
167 const Register ret{ctx.reg_alloc.Define(inst)};
168 ctx.Add("RSQ RC.x,{};RCP {}.x,RC.x;", value, ret);
169}
170
171void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
172 throw NotImplementedException("GLASM instruction");
173}
174
175void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
176 ctx.Add("MOV.F.SAT {}.x,{};", inst, value);
177}
178
179void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
180 throw NotImplementedException("GLASM instruction");
181}
182
183void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value,
184 [[maybe_unused]] Register min_value, [[maybe_unused]] Register max_value) {
185 throw NotImplementedException("GLASM instruction");
186}
187
188void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value,
189 ScalarF32 max_value) {
190 Clamp(ctx, ctx.reg_alloc.Define(inst), value, min_value, max_value, "F");
191}
192
193void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value,
194 ScalarF64 max_value) {
195 Clamp(ctx, ctx.reg_alloc.LongDefine(inst), value, min_value, max_value, "F64");
196}
197
198void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
199 throw NotImplementedException("GLASM instruction");
200}
201
202void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
203 ctx.Add("ROUND.F {}.x,{};", inst, value);
204}
205
206void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
207 ctx.LongAdd("ROUND.F64 {}.x,{};", inst, value);
208}
209
210void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
211 throw NotImplementedException("GLASM instruction");
212}
213
214void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
215 ctx.Add("FLR.F {}.x,{};", inst, value);
216}
217
218void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
219 ctx.LongAdd("FLR.F64 {}.x,{};", inst, value);
220}
221
222void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
223 throw NotImplementedException("GLASM instruction");
224}
225
226void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
227 ctx.Add("CEIL.F {}.x,{};", inst, value);
228}
229
230void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
231 ctx.LongAdd("CEIL.F64 {}.x,{};", inst, value);
232}
233
234void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
235 throw NotImplementedException("GLASM instruction");
236}
237
238void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
239 ctx.Add("TRUNC.F {}.x,{};", inst, value);
240}
241
242void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
243 ctx.LongAdd("TRUNC.F64 {}.x,{};", inst, value);
244}
245
246void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
247 [[maybe_unused]] Register rhs) {
248 throw NotImplementedException("GLASM instruction");
249}
250
251void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
252 Compare(ctx, inst, lhs, rhs, "SEQ", "F", true);
253}
254
255void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
256 Compare(ctx, inst, lhs, rhs, "SEQ", "F64", true);
257}
258
259void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
260 [[maybe_unused]] Register rhs) {
261 throw NotImplementedException("GLASM instruction");
262}
263
264void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
265 Compare(ctx, inst, lhs, rhs, "SEQ", "F", false);
266}
267
268void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
269 Compare(ctx, inst, lhs, rhs, "SEQ", "F64", false);
270}
271
272void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
273 [[maybe_unused]] Register rhs) {
274 throw NotImplementedException("GLASM instruction");
275}
276
277void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
278 Compare(ctx, inst, lhs, rhs, "SNE", "F", true, true);
279}
280
281void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
282 Compare(ctx, inst, lhs, rhs, "SNE", "F64", true, true);
283}
284
285void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
286 [[maybe_unused]] Register rhs) {
287 throw NotImplementedException("GLASM instruction");
288}
289
290void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
291 Compare(ctx, inst, lhs, rhs, "SNE", "F", false, true);
292}
293
294void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
295 Compare(ctx, inst, lhs, rhs, "SNE", "F64", false, true);
296}
297
298void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
299 [[maybe_unused]] Register rhs) {
300 throw NotImplementedException("GLASM instruction");
301}
302
303void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
304 Compare(ctx, inst, lhs, rhs, "SLT", "F", true);
305}
306
307void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
308 Compare(ctx, inst, lhs, rhs, "SLT", "F64", true);
309}
310
311void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
312 [[maybe_unused]] Register rhs) {
313 throw NotImplementedException("GLASM instruction");
314}
315
316void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
317 Compare(ctx, inst, lhs, rhs, "SLT", "F", false);
318}
319
320void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
321 Compare(ctx, inst, lhs, rhs, "SLT", "F64", false);
322}
323
324void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
325 [[maybe_unused]] Register rhs) {
326 throw NotImplementedException("GLASM instruction");
327}
328
329void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
330 Compare(ctx, inst, lhs, rhs, "SGT", "F", true);
331}
332
333void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
334 Compare(ctx, inst, lhs, rhs, "SGT", "F64", true);
335}
336
337void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
338 [[maybe_unused]] Register rhs) {
339 throw NotImplementedException("GLASM instruction");
340}
341
342void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
343 Compare(ctx, inst, lhs, rhs, "SGT", "F", false);
344}
345
346void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
347 Compare(ctx, inst, lhs, rhs, "SGT", "F64", false);
348}
349
350void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
351 [[maybe_unused]] Register rhs) {
352 throw NotImplementedException("GLASM instruction");
353}
354
355void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
356 Compare(ctx, inst, lhs, rhs, "SLE", "F", true);
357}
358
359void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
360 Compare(ctx, inst, lhs, rhs, "SLE", "F64", true);
361}
362
363void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
364 [[maybe_unused]] Register rhs) {
365 throw NotImplementedException("GLASM instruction");
366}
367
368void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
369 Compare(ctx, inst, lhs, rhs, "SLE", "F", false);
370}
371
372void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
373 Compare(ctx, inst, lhs, rhs, "SLE", "F64", false);
374}
375
376void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
377 [[maybe_unused]] Register rhs) {
378 throw NotImplementedException("GLASM instruction");
379}
380
381void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
382 Compare(ctx, inst, lhs, rhs, "SGE", "F", true);
383}
384
385void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
386 Compare(ctx, inst, lhs, rhs, "SGE", "F64", true);
387}
388
389void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
390 [[maybe_unused]] Register rhs) {
391 throw NotImplementedException("GLASM instruction");
392}
393
394void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
395 Compare(ctx, inst, lhs, rhs, "SGE", "F", false);
396}
397
398void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
399 Compare(ctx, inst, lhs, rhs, "SGE", "F64", false);
400}
401
402void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
403 throw NotImplementedException("GLASM instruction");
404}
405
406void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
407 Compare(ctx, inst, value, value, "SNE", "F", true, false);
408}
409
410void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
411 Compare(ctx, inst, value, value, "SNE", "F64", true, false);
412}
413
414} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
new file mode 100644
index 000000000..09e3a9b82
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
@@ -0,0 +1,850 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "shader_recompiler/backend/glasm/emit_context.h"
8#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/ir/value.h"
11
12namespace Shader::Backend::GLASM {
13namespace {
14struct ScopedRegister {
15 ScopedRegister() = default;
16 ScopedRegister(RegAlloc& reg_alloc_) : reg_alloc{&reg_alloc_}, reg{reg_alloc->AllocReg()} {}
17
18 ~ScopedRegister() {
19 if (reg_alloc) {
20 reg_alloc->FreeReg(reg);
21 }
22 }
23
24 ScopedRegister& operator=(ScopedRegister&& rhs) noexcept {
25 if (reg_alloc) {
26 reg_alloc->FreeReg(reg);
27 }
28 reg_alloc = std::exchange(rhs.reg_alloc, nullptr);
29 reg = rhs.reg;
30 return *this;
31 }
32
33 ScopedRegister(ScopedRegister&& rhs) noexcept
34 : reg_alloc{std::exchange(rhs.reg_alloc, nullptr)}, reg{rhs.reg} {}
35
36 ScopedRegister& operator=(const ScopedRegister&) = delete;
37 ScopedRegister(const ScopedRegister&) = delete;
38
39 RegAlloc* reg_alloc{};
40 Register reg;
41};
42
43std::string Texture(EmitContext& ctx, IR::TextureInstInfo info,
44 [[maybe_unused]] const IR::Value& index) {
45 // FIXME: indexed reads
46 if (info.type == TextureType::Buffer) {
47 return fmt::format("texture[{}]", ctx.texture_buffer_bindings.at(info.descriptor_index));
48 } else {
49 return fmt::format("texture[{}]", ctx.texture_bindings.at(info.descriptor_index));
50 }
51}
52
53std::string Image(EmitContext& ctx, IR::TextureInstInfo info,
54 [[maybe_unused]] const IR::Value& index) {
55 // FIXME: indexed reads
56 if (info.type == TextureType::Buffer) {
57 return fmt::format("image[{}]", ctx.image_buffer_bindings.at(info.descriptor_index));
58 } else {
59 return fmt::format("image[{}]", ctx.image_bindings.at(info.descriptor_index));
60 }
61}
62
63std::string_view TextureType(IR::TextureInstInfo info) {
64 if (info.is_depth) {
65 switch (info.type) {
66 case TextureType::Color1D:
67 return "SHADOW1D";
68 case TextureType::ColorArray1D:
69 return "SHADOWARRAY1D";
70 case TextureType::Color2D:
71 return "SHADOW2D";
72 case TextureType::ColorArray2D:
73 return "SHADOWARRAY2D";
74 case TextureType::Color3D:
75 return "SHADOW3D";
76 case TextureType::ColorCube:
77 return "SHADOWCUBE";
78 case TextureType::ColorArrayCube:
79 return "SHADOWARRAYCUBE";
80 case TextureType::Buffer:
81 return "SHADOWBUFFER";
82 }
83 } else {
84 switch (info.type) {
85 case TextureType::Color1D:
86 return "1D";
87 case TextureType::ColorArray1D:
88 return "ARRAY1D";
89 case TextureType::Color2D:
90 return "2D";
91 case TextureType::ColorArray2D:
92 return "ARRAY2D";
93 case TextureType::Color3D:
94 return "3D";
95 case TextureType::ColorCube:
96 return "CUBE";
97 case TextureType::ColorArrayCube:
98 return "ARRAYCUBE";
99 case TextureType::Buffer:
100 return "BUFFER";
101 }
102 }
103 throw InvalidArgument("Invalid texture type {}", info.type.Value());
104}
105
106std::string Offset(EmitContext& ctx, const IR::Value& offset) {
107 if (offset.IsEmpty()) {
108 return "";
109 }
110 return fmt::format(",offset({})", Register{ctx.reg_alloc.Consume(offset)});
111}
112
113std::pair<ScopedRegister, ScopedRegister> AllocOffsetsRegs(EmitContext& ctx,
114 const IR::Value& offset2) {
115 if (offset2.IsEmpty()) {
116 return {};
117 } else {
118 return {ctx.reg_alloc, ctx.reg_alloc};
119 }
120}
121
122void SwizzleOffsets(EmitContext& ctx, Register off_x, Register off_y, const IR::Value& offset1,
123 const IR::Value& offset2) {
124 const Register offsets_a{ctx.reg_alloc.Consume(offset1)};
125 const Register offsets_b{ctx.reg_alloc.Consume(offset2)};
126 // Input swizzle: [XYXY] [XYXY]
127 // Output swizzle: [XXXX] [YYYY]
128 ctx.Add("MOV {}.x,{}.x;"
129 "MOV {}.y,{}.z;"
130 "MOV {}.z,{}.x;"
131 "MOV {}.w,{}.z;"
132 "MOV {}.x,{}.y;"
133 "MOV {}.y,{}.w;"
134 "MOV {}.z,{}.y;"
135 "MOV {}.w,{}.w;",
136 off_x, offsets_a, off_x, offsets_a, off_x, offsets_b, off_x, offsets_b, off_y,
137 offsets_a, off_y, offsets_a, off_y, offsets_b, off_y, offsets_b);
138}
139
140std::string GradOffset(const IR::Value& offset) {
141 if (offset.IsImmediate()) {
142 LOG_WARNING(Shader_GLASM, "Gradient offset is a scalar immediate");
143 return "";
144 }
145 IR::Inst* const vector{offset.InstRecursive()};
146 if (!vector->AreAllArgsImmediates()) {
147 LOG_WARNING(Shader_GLASM, "Gradient offset vector is not immediate");
148 return "";
149 }
150 switch (vector->NumArgs()) {
151 case 1:
152 return fmt::format(",({})", static_cast<s32>(vector->Arg(0).U32()));
153 case 2:
154 return fmt::format(",({},{})", static_cast<s32>(vector->Arg(0).U32()),
155 static_cast<s32>(vector->Arg(1).U32()));
156 default:
157 throw LogicError("Invalid number of gradient offsets {}", vector->NumArgs());
158 }
159}
160
161std::pair<std::string, ScopedRegister> Coord(EmitContext& ctx, const IR::Value& coord) {
162 if (coord.IsImmediate()) {
163 ScopedRegister scoped_reg(ctx.reg_alloc);
164 ctx.Add("MOV.U {}.x,{};", scoped_reg.reg, ScalarU32{ctx.reg_alloc.Consume(coord)});
165 return {fmt::to_string(scoped_reg.reg), std::move(scoped_reg)};
166 }
167 std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})};
168 if (coord.InstRecursive()->HasUses()) {
169 // Move non-dead coords to a separate register, although this should never happen because
170 // vectors are only assembled for immediate texture instructions
171 ctx.Add("MOV.F RC,{};", coord_vec);
172 coord_vec = "RC";
173 }
174 return {std::move(coord_vec), ScopedRegister{}};
175}
176
177void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) {
178 if (!sparse_inst) {
179 return;
180 }
181 const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)};
182 ctx.Add("MOV.S {},-1;"
183 "MOV.S {}(NONRESIDENT),0;",
184 sparse_ret, sparse_ret);
185}
186
187std::string_view FormatStorage(ImageFormat format) {
188 switch (format) {
189 case ImageFormat::Typeless:
190 return "U";
191 case ImageFormat::R8_UINT:
192 return "U8";
193 case ImageFormat::R8_SINT:
194 return "S8";
195 case ImageFormat::R16_UINT:
196 return "U16";
197 case ImageFormat::R16_SINT:
198 return "S16";
199 case ImageFormat::R32_UINT:
200 return "U32";
201 case ImageFormat::R32G32_UINT:
202 return "U32X2";
203 case ImageFormat::R32G32B32A32_UINT:
204 return "U32X4";
205 }
206 throw InvalidArgument("Invalid image format {}", format);
207}
208
209template <typename T>
210void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, T value,
211 std::string_view op) {
212 const auto info{inst.Flags<IR::TextureInstInfo>()};
213 const std::string_view type{TextureType(info)};
214 const std::string image{Image(ctx, info, index)};
215 const Register ret{ctx.reg_alloc.Define(inst)};
216 ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type);
217}
218
219IR::Inst* PrepareSparse(IR::Inst& inst) {
220 const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
221 if (sparse_inst) {
222 sparse_inst->Invalidate();
223 }
224 return sparse_inst;
225}
226} // Anonymous namespace
227
228void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
229 const IR::Value& coord, Register bias_lc, const IR::Value& offset) {
230 const auto info{inst.Flags<IR::TextureInstInfo>()};
231 const auto sparse_inst{PrepareSparse(inst)};
232 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
233 const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""};
234 const std::string_view type{TextureType(info)};
235 const std::string texture{Texture(ctx, info, index)};
236 const std::string offset_vec{Offset(ctx, offset)};
237 const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
238 const Register ret{ctx.reg_alloc.Define(inst)};
239 if (info.has_bias) {
240 if (info.type == TextureType::ColorArrayCube) {
241 ctx.Add("TXB.F{}{} {},{},{},{},ARRAYCUBE{};", lod_clamp_mod, sparse_mod, ret, coord_vec,
242 bias_lc, texture, offset_vec);
243 } else {
244 if (info.has_lod_clamp) {
245 ctx.Add("MOV.F {}.w,{}.x;"
246 "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};",
247 coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, texture, type,
248 offset_vec);
249 } else {
250 ctx.Add("MOV.F {}.w,{}.x;"
251 "TXB.F{} {},{},{},{}{};",
252 coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, type, offset_vec);
253 }
254 }
255 } else {
256 if (info.has_lod_clamp && info.type == TextureType::ColorArrayCube) {
257 ctx.Add("TEX.F.LODCLAMP{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec,
258 bias_lc, texture, offset_vec);
259 } else {
260 ctx.Add("TEX.F{}{} {},{},{},{}{};", lod_clamp_mod, sparse_mod, ret, coord_vec, texture,
261 type, offset_vec);
262 }
263 }
264 StoreSparse(ctx, sparse_inst);
265}
266
267void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
268 const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) {
269 const auto info{inst.Flags<IR::TextureInstInfo>()};
270 const auto sparse_inst{PrepareSparse(inst)};
271 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
272 const std::string_view type{TextureType(info)};
273 const std::string texture{Texture(ctx, info, index)};
274 const std::string offset_vec{Offset(ctx, offset)};
275 const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
276 const Register ret{ctx.reg_alloc.Define(inst)};
277 if (info.type == TextureType::ColorArrayCube) {
278 ctx.Add("TXL.F{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, lod, texture,
279 offset_vec);
280 } else {
281 ctx.Add("MOV.F {}.w,{};"
282 "TXL.F{} {},{},{},{}{};",
283 coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec);
284 }
285 StoreSparse(ctx, sparse_inst);
286}
287
288void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
289 const IR::Value& coord, const IR::Value& dref,
290 const IR::Value& bias_lc, const IR::Value& offset) {
291 // Allocate early to avoid aliases
292 const auto info{inst.Flags<IR::TextureInstInfo>()};
293 ScopedRegister staging;
294 if (info.type == TextureType::ColorArrayCube) {
295 staging = ScopedRegister{ctx.reg_alloc};
296 }
297 const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
298 const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)};
299 const auto sparse_inst{PrepareSparse(inst)};
300 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
301 const std::string_view type{TextureType(info)};
302 const std::string texture{Texture(ctx, info, index)};
303 const std::string offset_vec{Offset(ctx, offset)};
304 const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
305 const Register ret{ctx.reg_alloc.Define(inst)};
306 if (info.has_bias) {
307 if (info.has_lod_clamp) {
308 switch (info.type) {
309 case TextureType::Color1D:
310 case TextureType::ColorArray1D:
311 case TextureType::Color2D:
312 ctx.Add("MOV.F {}.z,{};"
313 "MOV.F {}.w,{}.x;"
314 "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};",
315 coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec,
316 bias_lc_vec, texture, type, offset_vec);
317 break;
318 case TextureType::ColorArray2D:
319 case TextureType::ColorCube:
320 ctx.Add("MOV.F {}.w,{};"
321 "TXB.F.LODCLAMP{} {},{},{},{},{}{};",
322 coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type,
323 offset_vec);
324 break;
325 default:
326 throw NotImplementedException("Invalid type {} with bias and lod clamp",
327 info.type.Value());
328 }
329 } else {
330 switch (info.type) {
331 case TextureType::Color1D:
332 case TextureType::ColorArray1D:
333 case TextureType::Color2D:
334 ctx.Add("MOV.F {}.z,{};"
335 "MOV.F {}.w,{}.x;"
336 "TXB.F{} {},{},{},{}{};",
337 coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec,
338 texture, type, offset_vec);
339 break;
340 case TextureType::ColorArray2D:
341 case TextureType::ColorCube:
342 ctx.Add("MOV.F {}.w,{};"
343 "TXB.F{} {},{},{},{},{}{};",
344 coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type,
345 offset_vec);
346 break;
347 case TextureType::ColorArrayCube:
348 ctx.Add("MOV.F {}.x,{};"
349 "MOV.F {}.y,{}.x;"
350 "TXB.F{} {},{},{},{},{}{};",
351 staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec,
352 staging.reg, texture, type, offset_vec);
353 break;
354 default:
355 throw NotImplementedException("Invalid type {}", info.type.Value());
356 }
357 }
358 } else {
359 if (info.has_lod_clamp) {
360 if (info.type != TextureType::ColorArrayCube) {
361 const bool w_swizzle{info.type == TextureType::ColorArray2D ||
362 info.type == TextureType::ColorCube};
363 const char dref_swizzle{w_swizzle ? 'w' : 'z'};
364 ctx.Add("MOV.F {}.{},{};"
365 "TEX.F.LODCLAMP{} {},{},{},{},{}{};",
366 coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec,
367 texture, type, offset_vec);
368 } else {
369 ctx.Add("MOV.F {}.x,{};"
370 "MOV.F {}.y,{};"
371 "TEX.F.LODCLAMP{} {},{},{},{},{}{};",
372 staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec,
373 staging.reg, texture, type, offset_vec);
374 }
375 } else {
376 if (info.type != TextureType::ColorArrayCube) {
377 const bool w_swizzle{info.type == TextureType::ColorArray2D ||
378 info.type == TextureType::ColorCube};
379 const char dref_swizzle{w_swizzle ? 'w' : 'z'};
380 ctx.Add("MOV.F {}.{},{};"
381 "TEX.F{} {},{},{},{}{};",
382 coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, texture,
383 type, offset_vec);
384 } else {
385 ctx.Add("TEX.F{} {},{},{},{},{}{};", sparse_mod, ret, coord_vec, dref_val, texture,
386 type, offset_vec);
387 }
388 }
389 }
390 StoreSparse(ctx, sparse_inst);
391}
392
393void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
394 const IR::Value& coord, const IR::Value& dref,
395 const IR::Value& lod, const IR::Value& offset) {
396 // Allocate early to avoid aliases
397 const auto info{inst.Flags<IR::TextureInstInfo>()};
398 ScopedRegister staging;
399 if (info.type == TextureType::ColorArrayCube) {
400 staging = ScopedRegister{ctx.reg_alloc};
401 }
402 const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
403 const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)};
404 const auto sparse_inst{PrepareSparse(inst)};
405 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
406 const std::string_view type{TextureType(info)};
407 const std::string texture{Texture(ctx, info, index)};
408 const std::string offset_vec{Offset(ctx, offset)};
409 const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
410 const Register ret{ctx.reg_alloc.Define(inst)};
411 switch (info.type) {
412 case TextureType::Color1D:
413 case TextureType::ColorArray1D:
414 case TextureType::Color2D:
415 ctx.Add("MOV.F {}.z,{};"
416 "MOV.F {}.w,{};"
417 "TXL.F{} {},{},{},{}{};",
418 coord_vec, dref_val, coord_vec, lod_val, sparse_mod, ret, coord_vec, texture, type,
419 offset_vec);
420 break;
421 case TextureType::ColorArray2D:
422 case TextureType::ColorCube:
423 ctx.Add("MOV.F {}.w,{};"
424 "TXL.F{} {},{},{},{},{}{};",
425 coord_vec, dref_val, sparse_mod, ret, coord_vec, lod_val, texture, type,
426 offset_vec);
427 break;
428 case TextureType::ColorArrayCube:
429 ctx.Add("MOV.F {}.x,{};"
430 "MOV.F {}.y,{};"
431 "TXL.F{} {},{},{},{},{}{};",
432 staging.reg, dref_val, staging.reg, lod_val, sparse_mod, ret, coord_vec,
433 staging.reg, texture, type, offset_vec);
434 break;
435 default:
436 throw NotImplementedException("Invalid type {}", info.type.Value());
437 }
438 StoreSparse(ctx, sparse_inst);
439}
440
441void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
442 const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2) {
443 // Allocate offsets early so they don't overwrite any consumed register
444 const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
445 const auto info{inst.Flags<IR::TextureInstInfo>()};
446 const char comp{"xyzw"[info.gather_component]};
447 const auto sparse_inst{PrepareSparse(inst)};
448 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
449 const std::string_view type{TextureType(info)};
450 const std::string texture{Texture(ctx, info, index)};
451 const Register coord_vec{ctx.reg_alloc.Consume(coord)};
452 const Register ret{ctx.reg_alloc.Define(inst)};
453 if (offset2.IsEmpty()) {
454 const std::string offset_vec{Offset(ctx, offset)};
455 ctx.Add("TXG.F{} {},{},{}.{},{}{};", sparse_mod, ret, coord_vec, texture, comp, type,
456 offset_vec);
457 } else {
458 SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2);
459 ctx.Add("TXGO.F{} {},{},{},{},{}.{},{};", sparse_mod, ret, coord_vec, off_x.reg, off_y.reg,
460 texture, comp, type);
461 }
462 StoreSparse(ctx, sparse_inst);
463}
464
465void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
466 const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2,
467 const IR::Value& dref) {
468 // FIXME: This instruction is not working as expected
469
470 // Allocate offsets early so they don't overwrite any consumed register
471 const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
472 const auto info{inst.Flags<IR::TextureInstInfo>()};
473 const auto sparse_inst{PrepareSparse(inst)};
474 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
475 const std::string_view type{TextureType(info)};
476 const std::string texture{Texture(ctx, info, index)};
477 const Register coord_vec{ctx.reg_alloc.Consume(coord)};
478 const ScalarF32 dref_value{ctx.reg_alloc.Consume(dref)};
479 const Register ret{ctx.reg_alloc.Define(inst)};
480 std::string args;
481 switch (info.type) {
482 case TextureType::Color2D:
483 ctx.Add("MOV.F {}.z,{};", coord_vec, dref_value);
484 args = fmt::to_string(coord_vec);
485 break;
486 case TextureType::ColorArray2D:
487 case TextureType::ColorCube:
488 ctx.Add("MOV.F {}.w,{};", coord_vec, dref_value);
489 args = fmt::to_string(coord_vec);
490 break;
491 case TextureType::ColorArrayCube:
492 args = fmt::format("{},{}", coord_vec, dref_value);
493 break;
494 default:
495 throw NotImplementedException("Invalid type {}", info.type.Value());
496 }
497 if (offset2.IsEmpty()) {
498 const std::string offset_vec{Offset(ctx, offset)};
499 ctx.Add("TXG.F{} {},{},{},{}{};", sparse_mod, ret, args, texture, type, offset_vec);
500 } else {
501 SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2);
502 ctx.Add("TXGO.F{} {},{},{},{},{},{};", sparse_mod, ret, args, off_x.reg, off_y.reg, texture,
503 type);
504 }
505 StoreSparse(ctx, sparse_inst);
506}
507
508void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
509 const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) {
510 const auto info{inst.Flags<IR::TextureInstInfo>()};
511 const auto sparse_inst{PrepareSparse(inst)};
512 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
513 const std::string_view type{TextureType(info)};
514 const std::string texture{Texture(ctx, info, index)};
515 const std::string offset_vec{Offset(ctx, offset)};
516 const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
517 const Register ret{ctx.reg_alloc.Define(inst)};
518 if (info.type == TextureType::Buffer) {
519 ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec);
520 } else if (ms.type != Type::Void) {
521 ctx.Add("MOV.S {}.w,{};"
522 "TXFMS.F{} {},{},{},{}{};",
523 coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec);
524 } else {
525 ctx.Add("MOV.S {}.w,{};"
526 "TXF.F{} {},{},{},{}{};",
527 coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec);
528 }
529 StoreSparse(ctx, sparse_inst);
530}
531
532void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
533 ScalarS32 lod) {
534 const auto info{inst.Flags<IR::TextureInstInfo>()};
535 const std::string texture{Texture(ctx, info, index)};
536 const std::string_view type{TextureType(info)};
537 ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type);
538}
539
540void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
541 const auto info{inst.Flags<IR::TextureInstInfo>()};
542 const std::string texture{Texture(ctx, info, index)};
543 const std::string_view type{TextureType(info)};
544 ctx.Add("LOD.F {},{},{},{};", inst, coord, texture, type);
545}
546
547void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
548 const IR::Value& coord, const IR::Value& derivatives,
549 const IR::Value& offset, const IR::Value& lod_clamp) {
550 const auto info{inst.Flags<IR::TextureInstInfo>()};
551 ScopedRegister dpdx, dpdy;
552 const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
553 if (multi_component) {
554 // Allocate this early to avoid aliasing other registers
555 dpdx = ScopedRegister{ctx.reg_alloc};
556 dpdy = ScopedRegister{ctx.reg_alloc};
557 }
558 const auto sparse_inst{PrepareSparse(inst)};
559 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
560 const std::string_view type{TextureType(info)};
561 const std::string texture{Texture(ctx, info, index)};
562 const std::string offset_vec{GradOffset(offset)};
563 const Register coord_vec{ctx.reg_alloc.Consume(coord)};
564 const Register derivatives_vec{ctx.reg_alloc.Consume(derivatives)};
565 const Register ret{ctx.reg_alloc.Define(inst)};
566 if (multi_component) {
567 ctx.Add("MOV.F {}.x,{}.x;"
568 "MOV.F {}.y,{}.z;"
569 "MOV.F {}.x,{}.y;"
570 "MOV.F {}.y,{}.w;",
571 dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec,
572 dpdy.reg, derivatives_vec);
573 if (info.has_lod_clamp) {
574 const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)};
575 ctx.Add("MOV.F {}.w,{};"
576 "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};",
577 dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
578 texture, type, offset_vec);
579 } else {
580 ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
581 texture, type, offset_vec);
582 }
583 } else {
584 ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec,
585 derivatives_vec, texture, type, offset_vec);
586 }
587 StoreSparse(ctx, sparse_inst);
588}
589
590void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
591 const auto info{inst.Flags<IR::TextureInstInfo>()};
592 const auto sparse_inst{PrepareSparse(inst)};
593 const std::string_view format{FormatStorage(info.image_format)};
594 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
595 const std::string_view type{TextureType(info)};
596 const std::string image{Image(ctx, info, index)};
597 const Register ret{ctx.reg_alloc.Define(inst)};
598 ctx.Add("LOADIM.{}{} {},{},{},{};", format, sparse_mod, ret, coord, image, type);
599 StoreSparse(ctx, sparse_inst);
600}
601
602void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
603 Register color) {
604 const auto info{inst.Flags<IR::TextureInstInfo>()};
605 const std::string_view format{FormatStorage(info.image_format)};
606 const std::string_view type{TextureType(info)};
607 const std::string image{Image(ctx, info, index)};
608 ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type);
609}
610
611void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
612 ScalarU32 value) {
613 ImageAtomic(ctx, inst, index, coord, value, "ADD.U32");
614}
615
616void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
617 ScalarS32 value) {
618 ImageAtomic(ctx, inst, index, coord, value, "MIN.S32");
619}
620
621void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
622 ScalarU32 value) {
623 ImageAtomic(ctx, inst, index, coord, value, "MIN.U32");
624}
625
626void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
627 ScalarS32 value) {
628 ImageAtomic(ctx, inst, index, coord, value, "MAX.S32");
629}
630
631void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
632 ScalarU32 value) {
633 ImageAtomic(ctx, inst, index, coord, value, "MAX.U32");
634}
635
636void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
637 ScalarU32 value) {
638 ImageAtomic(ctx, inst, index, coord, value, "IWRAP.U32");
639}
640
641void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
642 ScalarU32 value) {
643 ImageAtomic(ctx, inst, index, coord, value, "DWRAP.U32");
644}
645
646void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
647 ScalarU32 value) {
648 ImageAtomic(ctx, inst, index, coord, value, "AND.U32");
649}
650
651void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
652 ScalarU32 value) {
653 ImageAtomic(ctx, inst, index, coord, value, "OR.U32");
654}
655
656void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
657 ScalarU32 value) {
658 ImageAtomic(ctx, inst, index, coord, value, "XOR.U32");
659}
660
661void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
662 Register coord, ScalarU32 value) {
663 ImageAtomic(ctx, inst, index, coord, value, "EXCH.U32");
664}
665
666void EmitBindlessImageSampleImplicitLod(EmitContext&) {
667 throw LogicError("Unreachable instruction");
668}
669
670void EmitBindlessImageSampleExplicitLod(EmitContext&) {
671 throw LogicError("Unreachable instruction");
672}
673
674void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
675 throw LogicError("Unreachable instruction");
676}
677
678void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
679 throw LogicError("Unreachable instruction");
680}
681
682void EmitBindlessImageGather(EmitContext&) {
683 throw LogicError("Unreachable instruction");
684}
685
686void EmitBindlessImageGatherDref(EmitContext&) {
687 throw LogicError("Unreachable instruction");
688}
689
690void EmitBindlessImageFetch(EmitContext&) {
691 throw LogicError("Unreachable instruction");
692}
693
694void EmitBindlessImageQueryDimensions(EmitContext&) {
695 throw LogicError("Unreachable instruction");
696}
697
698void EmitBindlessImageQueryLod(EmitContext&) {
699 throw LogicError("Unreachable instruction");
700}
701
702void EmitBindlessImageGradient(EmitContext&) {
703 throw LogicError("Unreachable instruction");
704}
705
706void EmitBindlessImageRead(EmitContext&) {
707 throw LogicError("Unreachable instruction");
708}
709
710void EmitBindlessImageWrite(EmitContext&) {
711 throw LogicError("Unreachable instruction");
712}
713
714void EmitBoundImageSampleImplicitLod(EmitContext&) {
715 throw LogicError("Unreachable instruction");
716}
717
718void EmitBoundImageSampleExplicitLod(EmitContext&) {
719 throw LogicError("Unreachable instruction");
720}
721
722void EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
723 throw LogicError("Unreachable instruction");
724}
725
726void EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
727 throw LogicError("Unreachable instruction");
728}
729
730void EmitBoundImageGather(EmitContext&) {
731 throw LogicError("Unreachable instruction");
732}
733
734void EmitBoundImageGatherDref(EmitContext&) {
735 throw LogicError("Unreachable instruction");
736}
737
738void EmitBoundImageFetch(EmitContext&) {
739 throw LogicError("Unreachable instruction");
740}
741
742void EmitBoundImageQueryDimensions(EmitContext&) {
743 throw LogicError("Unreachable instruction");
744}
745
746void EmitBoundImageQueryLod(EmitContext&) {
747 throw LogicError("Unreachable instruction");
748}
749
750void EmitBoundImageGradient(EmitContext&) {
751 throw LogicError("Unreachable instruction");
752}
753
754void EmitBoundImageRead(EmitContext&) {
755 throw LogicError("Unreachable instruction");
756}
757
758void EmitBoundImageWrite(EmitContext&) {
759 throw LogicError("Unreachable instruction");
760}
761
762void EmitBindlessImageAtomicIAdd32(EmitContext&) {
763 throw LogicError("Unreachable instruction");
764}
765
766void EmitBindlessImageAtomicSMin32(EmitContext&) {
767 throw LogicError("Unreachable instruction");
768}
769
770void EmitBindlessImageAtomicUMin32(EmitContext&) {
771 throw LogicError("Unreachable instruction");
772}
773
774void EmitBindlessImageAtomicSMax32(EmitContext&) {
775 throw LogicError("Unreachable instruction");
776}
777
778void EmitBindlessImageAtomicUMax32(EmitContext&) {
779 throw LogicError("Unreachable instruction");
780}
781
782void EmitBindlessImageAtomicInc32(EmitContext&) {
783 throw LogicError("Unreachable instruction");
784}
785
786void EmitBindlessImageAtomicDec32(EmitContext&) {
787 throw LogicError("Unreachable instruction");
788}
789
790void EmitBindlessImageAtomicAnd32(EmitContext&) {
791 throw LogicError("Unreachable instruction");
792}
793
794void EmitBindlessImageAtomicOr32(EmitContext&) {
795 throw LogicError("Unreachable instruction");
796}
797
798void EmitBindlessImageAtomicXor32(EmitContext&) {
799 throw LogicError("Unreachable instruction");
800}
801
802void EmitBindlessImageAtomicExchange32(EmitContext&) {
803 throw LogicError("Unreachable instruction");
804}
805
806void EmitBoundImageAtomicIAdd32(EmitContext&) {
807 throw LogicError("Unreachable instruction");
808}
809
810void EmitBoundImageAtomicSMin32(EmitContext&) {
811 throw LogicError("Unreachable instruction");
812}
813
814void EmitBoundImageAtomicUMin32(EmitContext&) {
815 throw LogicError("Unreachable instruction");
816}
817
818void EmitBoundImageAtomicSMax32(EmitContext&) {
819 throw LogicError("Unreachable instruction");
820}
821
822void EmitBoundImageAtomicUMax32(EmitContext&) {
823 throw LogicError("Unreachable instruction");
824}
825
826void EmitBoundImageAtomicInc32(EmitContext&) {
827 throw LogicError("Unreachable instruction");
828}
829
830void EmitBoundImageAtomicDec32(EmitContext&) {
831 throw LogicError("Unreachable instruction");
832}
833
834void EmitBoundImageAtomicAnd32(EmitContext&) {
835 throw LogicError("Unreachable instruction");
836}
837
838void EmitBoundImageAtomicOr32(EmitContext&) {
839 throw LogicError("Unreachable instruction");
840}
841
842void EmitBoundImageAtomicXor32(EmitContext&) {
843 throw LogicError("Unreachable instruction");
844}
845
846void EmitBoundImageAtomicExchange32(EmitContext&) {
847 throw LogicError("Unreachable instruction");
848}
849
850} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
new file mode 100644
index 000000000..12afda43b
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
@@ -0,0 +1,625 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/backend/glasm/reg_alloc.h"
9
10namespace Shader::IR {
11enum class Attribute : u64;
12enum class Patch : u64;
13class Inst;
14class Value;
15} // namespace Shader::IR
16
17namespace Shader::Backend::GLASM {
18
19class EmitContext;
20
21// Microinstruction emitters
22void EmitPhi(EmitContext& ctx, IR::Inst& inst);
23void EmitVoid(EmitContext& ctx);
24void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
25void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
26void EmitReference(EmitContext&, const IR::Value& value);
27void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value);
28void EmitJoin(EmitContext& ctx);
29void EmitDemoteToHelperInvocation(EmitContext& ctx);
30void EmitBarrier(EmitContext& ctx);
31void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
32void EmitDeviceMemoryBarrier(EmitContext& ctx);
33void EmitPrologue(EmitContext& ctx);
34void EmitEpilogue(EmitContext& ctx);
35void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream);
36void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);
37void EmitGetRegister(EmitContext& ctx);
38void EmitSetRegister(EmitContext& ctx);
39void EmitGetPred(EmitContext& ctx);
40void EmitSetPred(EmitContext& ctx);
41void EmitSetGotoVariable(EmitContext& ctx);
42void EmitGetGotoVariable(EmitContext& ctx);
43void EmitSetIndirectBranchVariable(EmitContext& ctx);
44void EmitGetIndirectBranchVariable(EmitContext& ctx);
45void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
46void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
47void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
48void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
49void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
50void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
51void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
52void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex);
53void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex);
54void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex);
55void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex);
56void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch);
57void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value);
58void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value);
59void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value);
60void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value);
61void EmitGetZFlag(EmitContext& ctx);
62void EmitGetSFlag(EmitContext& ctx);
63void EmitGetCFlag(EmitContext& ctx);
64void EmitGetOFlag(EmitContext& ctx);
65void EmitSetZFlag(EmitContext& ctx);
66void EmitSetSFlag(EmitContext& ctx);
67void EmitSetCFlag(EmitContext& ctx);
68void EmitSetOFlag(EmitContext& ctx);
69void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst);
70void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst);
71void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
72void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
73void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
74void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
75void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset);
76void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value);
77void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
78void EmitUndefU8(EmitContext& ctx, IR::Inst& inst);
79void EmitUndefU16(EmitContext& ctx, IR::Inst& inst);
80void EmitUndefU32(EmitContext& ctx, IR::Inst& inst);
81void EmitUndefU64(EmitContext& ctx, IR::Inst& inst);
82void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address);
83void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address);
84void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address);
85void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address);
86void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address);
87void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address);
88void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address);
89void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value);
90void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value);
91void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value);
92void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value);
93void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value);
94void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value);
95void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value);
96void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
97 ScalarU32 offset);
98void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
99 ScalarU32 offset);
100void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
101 ScalarU32 offset);
102void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
103 ScalarU32 offset);
104void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
105 ScalarU32 offset);
106void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
107 ScalarU32 offset);
108void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
109 ScalarU32 offset);
110void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
111 ScalarU32 value);
112void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
113 ScalarS32 value);
114void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
115 ScalarU32 value);
116void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
117 ScalarS32 value);
118void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
119 ScalarU32 value);
120void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
121 Register value);
122void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
123 Register value);
124void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
125void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
126void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
127void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
128void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
129void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
130void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
131void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
132void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
133void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
134void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value);
135void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value);
136void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
137 const IR::Value& e2);
138void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
139 const IR::Value& e2, const IR::Value& e3);
140void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
141 const IR::Value& e2, const IR::Value& e3, const IR::Value& e4);
142void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
143void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
144void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
145void EmitCompositeInsertU32x2(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
146void EmitCompositeInsertU32x3(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
147void EmitCompositeInsertU32x4(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
148void EmitCompositeConstructF16x2(EmitContext& ctx, Register e1, Register e2);
149void EmitCompositeConstructF16x3(EmitContext& ctx, Register e1, Register e2, Register e3);
150void EmitCompositeConstructF16x4(EmitContext& ctx, Register e1, Register e2, Register e3,
151 Register e4);
152void EmitCompositeExtractF16x2(EmitContext& ctx, Register composite, u32 index);
153void EmitCompositeExtractF16x3(EmitContext& ctx, Register composite, u32 index);
154void EmitCompositeExtractF16x4(EmitContext& ctx, Register composite, u32 index);
155void EmitCompositeInsertF16x2(EmitContext& ctx, Register composite, Register object, u32 index);
156void EmitCompositeInsertF16x3(EmitContext& ctx, Register composite, Register object, u32 index);
157void EmitCompositeInsertF16x4(EmitContext& ctx, Register composite, Register object, u32 index);
158void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
159 const IR::Value& e2);
160void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
161 const IR::Value& e2, const IR::Value& e3);
162void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
163 const IR::Value& e2, const IR::Value& e3, const IR::Value& e4);
164void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
165void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
166void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
167void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite,
168 ScalarF32 object, u32 index);
169void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite,
170 ScalarF32 object, u32 index);
171void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite,
172 ScalarF32 object, u32 index);
173void EmitCompositeConstructF64x2(EmitContext& ctx);
174void EmitCompositeConstructF64x3(EmitContext& ctx);
175void EmitCompositeConstructF64x4(EmitContext& ctx);
176void EmitCompositeExtractF64x2(EmitContext& ctx);
177void EmitCompositeExtractF64x3(EmitContext& ctx);
178void EmitCompositeExtractF64x4(EmitContext& ctx);
179void EmitCompositeInsertF64x2(EmitContext& ctx, Register composite, Register object, u32 index);
180void EmitCompositeInsertF64x3(EmitContext& ctx, Register composite, Register object, u32 index);
181void EmitCompositeInsertF64x4(EmitContext& ctx, Register composite, Register object, u32 index);
182void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
183 ScalarS32 false_value);
184void EmitSelectU8(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value);
185void EmitSelectU16(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value);
186void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
187 ScalarS32 false_value);
188void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value,
189 Register false_value);
190void EmitSelectF16(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value);
191void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
192 ScalarS32 false_value);
193void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value);
194void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
195void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
196void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
197void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
198void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
199void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
200void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value);
201void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value);
202void EmitPackFloat2x16(EmitContext& ctx, Register value);
203void EmitUnpackFloat2x16(EmitContext& ctx, Register value);
204void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value);
205void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value);
206void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value);
207void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value);
208void EmitGetZeroFromOp(EmitContext& ctx);
209void EmitGetSignFromOp(EmitContext& ctx);
210void EmitGetCarryFromOp(EmitContext& ctx);
211void EmitGetOverflowFromOp(EmitContext& ctx);
212void EmitGetSparseFromOp(EmitContext& ctx);
213void EmitGetInBoundsFromOp(EmitContext& ctx);
214void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, Register value);
215void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
216void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
217void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
218void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
219void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
220void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c);
221void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c);
222void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c);
223void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
224void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
225void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
226void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
227void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
228void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
229void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
230void EmitFPNeg16(EmitContext& ctx, Register value);
231void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value);
232void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value);
233void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
234void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
235void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
236void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
237void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
238void EmitFPRecip64(EmitContext& ctx, Register value);
239void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
240void EmitFPRecipSqrt64(EmitContext& ctx, Register value);
241void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
242void EmitFPSaturate16(EmitContext& ctx, Register value);
243void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
244void EmitFPSaturate64(EmitContext& ctx, Register value);
245void EmitFPClamp16(EmitContext& ctx, Register value, Register min_value, Register max_value);
246void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value,
247 ScalarF32 max_value);
248void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value,
249 ScalarF64 max_value);
250void EmitFPRoundEven16(EmitContext& ctx, Register value);
251void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
252void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
253void EmitFPFloor16(EmitContext& ctx, Register value);
254void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
255void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
256void EmitFPCeil16(EmitContext& ctx, Register value);
257void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
258void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
259void EmitFPTrunc16(EmitContext& ctx, Register value);
260void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
261void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
262void EmitFPOrdEqual16(EmitContext& ctx, Register lhs, Register rhs);
263void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
264void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
265void EmitFPUnordEqual16(EmitContext& ctx, Register lhs, Register rhs);
266void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
267void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
268void EmitFPOrdNotEqual16(EmitContext& ctx, Register lhs, Register rhs);
269void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
270void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
271void EmitFPUnordNotEqual16(EmitContext& ctx, Register lhs, Register rhs);
272void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
273void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
274void EmitFPOrdLessThan16(EmitContext& ctx, Register lhs, Register rhs);
275void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
276void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
277void EmitFPUnordLessThan16(EmitContext& ctx, Register lhs, Register rhs);
278void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
279void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
280void EmitFPOrdGreaterThan16(EmitContext& ctx, Register lhs, Register rhs);
281void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
282void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
283void EmitFPUnordGreaterThan16(EmitContext& ctx, Register lhs, Register rhs);
284void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
285void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
286void EmitFPOrdLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
287void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
288void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
289void EmitFPUnordLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
290void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
291void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
292void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
293void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
294void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
295void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
296void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
297void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
298void EmitFPIsNan16(EmitContext& ctx, Register value);
299void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
300void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
301void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
302void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
303void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
304void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
305void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
306void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
307void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value);
308void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
309void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift);
310void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, ScalarU32 shift);
311void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift);
312void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
313 ScalarU32 shift);
314void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift);
315void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
316 ScalarS32 shift);
317void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
318void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
319void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
320void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert,
321 ScalarS32 offset, ScalarS32 count);
322void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset,
323 ScalarS32 count);
324void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
325 ScalarU32 count);
326void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
327void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
328void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
329void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
330void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
331void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
332void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b);
333void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
334void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b);
335void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max);
336void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max);
337void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
338void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
339void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
340void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
341void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
342void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
343void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
344void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
345void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
346void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
347void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
348 ScalarU32 value);
349void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
350 ScalarS32 value);
351void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
352 ScalarU32 value);
353void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
354 ScalarS32 value);
355void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
356 ScalarU32 value);
357void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
358 ScalarU32 value);
359void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
360 ScalarU32 value);
361void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
362 ScalarU32 value);
363void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
364 ScalarU32 value);
365void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
366 ScalarU32 value);
367void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
368 ScalarU32 value);
369void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
370 Register value);
371void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
372 ScalarU32 offset, ScalarU32 value);
373void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
374 ScalarU32 offset, ScalarS32 value);
375void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
376 ScalarU32 offset, ScalarU32 value);
377void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
378 ScalarU32 offset, ScalarS32 value);
379void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
380 ScalarU32 offset, ScalarU32 value);
381void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
382 ScalarU32 offset, ScalarU32 value);
383void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
384 ScalarU32 offset, ScalarU32 value);
385void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
386 ScalarU32 offset, ScalarU32 value);
387void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
388 ScalarU32 offset, ScalarU32 value);
389void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
390 ScalarU32 offset, ScalarU32 value);
391void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
392 ScalarU32 offset, ScalarU32 value);
393void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
394 ScalarU32 offset, Register value);
395void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
396 ScalarU32 offset, Register value);
397void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
398 ScalarU32 offset, Register value);
399void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
400 ScalarU32 offset, Register value);
401void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
402 ScalarU32 offset, Register value);
403void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
404 ScalarU32 offset, Register value);
405void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
406 ScalarU32 offset, Register value);
407void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
408 ScalarU32 offset, Register value);
409void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
410 ScalarU32 offset, Register value);
411void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
412 ScalarU32 offset, ScalarF32 value);
413void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
414 ScalarU32 offset, Register value);
415void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
416 ScalarU32 offset, Register value);
417void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
418 ScalarU32 offset, Register value);
419void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
420 ScalarU32 offset, Register value);
421void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
422 ScalarU32 offset, Register value);
423void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
424 ScalarU32 offset, Register value);
425void EmitGlobalAtomicIAdd32(EmitContext& ctx);
426void EmitGlobalAtomicSMin32(EmitContext& ctx);
427void EmitGlobalAtomicUMin32(EmitContext& ctx);
428void EmitGlobalAtomicSMax32(EmitContext& ctx);
429void EmitGlobalAtomicUMax32(EmitContext& ctx);
430void EmitGlobalAtomicInc32(EmitContext& ctx);
431void EmitGlobalAtomicDec32(EmitContext& ctx);
432void EmitGlobalAtomicAnd32(EmitContext& ctx);
433void EmitGlobalAtomicOr32(EmitContext& ctx);
434void EmitGlobalAtomicXor32(EmitContext& ctx);
435void EmitGlobalAtomicExchange32(EmitContext& ctx);
436void EmitGlobalAtomicIAdd64(EmitContext& ctx);
437void EmitGlobalAtomicSMin64(EmitContext& ctx);
438void EmitGlobalAtomicUMin64(EmitContext& ctx);
439void EmitGlobalAtomicSMax64(EmitContext& ctx);
440void EmitGlobalAtomicUMax64(EmitContext& ctx);
441void EmitGlobalAtomicInc64(EmitContext& ctx);
442void EmitGlobalAtomicDec64(EmitContext& ctx);
443void EmitGlobalAtomicAnd64(EmitContext& ctx);
444void EmitGlobalAtomicOr64(EmitContext& ctx);
445void EmitGlobalAtomicXor64(EmitContext& ctx);
446void EmitGlobalAtomicExchange64(EmitContext& ctx);
447void EmitGlobalAtomicAddF32(EmitContext& ctx);
448void EmitGlobalAtomicAddF16x2(EmitContext& ctx);
449void EmitGlobalAtomicAddF32x2(EmitContext& ctx);
450void EmitGlobalAtomicMinF16x2(EmitContext& ctx);
451void EmitGlobalAtomicMinF32x2(EmitContext& ctx);
452void EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
453void EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
454void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
455void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
456void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
457void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
458void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value);
459void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
460void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
461void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value);
462void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
463void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
464void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value);
465void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
466void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
467void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value);
468void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
469void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
470void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value);
471void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
472void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
473void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value);
474void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
475void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
476void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
477void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value);
478void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
479void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value);
480void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
481void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
482void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value);
483void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value);
484void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
485void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value);
486void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value);
487void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value);
488void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
489void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value);
490void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value);
491void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value);
492void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
493void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value);
494void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value);
495void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value);
496void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
497void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value);
498void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value);
499void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value);
500void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
501void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value);
502void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value);
503void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value);
504void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
505void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value);
506void EmitBindlessImageSampleImplicitLod(EmitContext&);
507void EmitBindlessImageSampleExplicitLod(EmitContext&);
508void EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
509void EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
510void EmitBindlessImageGather(EmitContext&);
511void EmitBindlessImageGatherDref(EmitContext&);
512void EmitBindlessImageFetch(EmitContext&);
513void EmitBindlessImageQueryDimensions(EmitContext&);
514void EmitBindlessImageQueryLod(EmitContext&);
515void EmitBindlessImageGradient(EmitContext&);
516void EmitBindlessImageRead(EmitContext&);
517void EmitBindlessImageWrite(EmitContext&);
518void EmitBoundImageSampleImplicitLod(EmitContext&);
519void EmitBoundImageSampleExplicitLod(EmitContext&);
520void EmitBoundImageSampleDrefImplicitLod(EmitContext&);
521void EmitBoundImageSampleDrefExplicitLod(EmitContext&);
522void EmitBoundImageGather(EmitContext&);
523void EmitBoundImageGatherDref(EmitContext&);
524void EmitBoundImageFetch(EmitContext&);
525void EmitBoundImageQueryDimensions(EmitContext&);
526void EmitBoundImageQueryLod(EmitContext&);
527void EmitBoundImageGradient(EmitContext&);
528void EmitBoundImageRead(EmitContext&);
529void EmitBoundImageWrite(EmitContext&);
530void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
531 const IR::Value& coord, Register bias_lc, const IR::Value& offset);
532void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
533 const IR::Value& coord, ScalarF32 lod, const IR::Value& offset);
534void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
535 const IR::Value& coord, const IR::Value& dref,
536 const IR::Value& bias_lc, const IR::Value& offset);
537void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
538 const IR::Value& coord, const IR::Value& dref,
539 const IR::Value& lod, const IR::Value& offset);
540void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
541 const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2);
542void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
543 const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2,
544 const IR::Value& dref);
545void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
546 const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms);
547void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
548 ScalarS32 lod);
549void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
550void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
551 const IR::Value& coord, const IR::Value& derivatives,
552 const IR::Value& offset, const IR::Value& lod_clamp);
553void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
554void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
555 Register color);
556void EmitBindlessImageAtomicIAdd32(EmitContext&);
557void EmitBindlessImageAtomicSMin32(EmitContext&);
558void EmitBindlessImageAtomicUMin32(EmitContext&);
559void EmitBindlessImageAtomicSMax32(EmitContext&);
560void EmitBindlessImageAtomicUMax32(EmitContext&);
561void EmitBindlessImageAtomicInc32(EmitContext&);
562void EmitBindlessImageAtomicDec32(EmitContext&);
563void EmitBindlessImageAtomicAnd32(EmitContext&);
564void EmitBindlessImageAtomicOr32(EmitContext&);
565void EmitBindlessImageAtomicXor32(EmitContext&);
566void EmitBindlessImageAtomicExchange32(EmitContext&);
567void EmitBoundImageAtomicIAdd32(EmitContext&);
568void EmitBoundImageAtomicSMin32(EmitContext&);
569void EmitBoundImageAtomicUMin32(EmitContext&);
570void EmitBoundImageAtomicSMax32(EmitContext&);
571void EmitBoundImageAtomicUMax32(EmitContext&);
572void EmitBoundImageAtomicInc32(EmitContext&);
573void EmitBoundImageAtomicDec32(EmitContext&);
574void EmitBoundImageAtomicAnd32(EmitContext&);
575void EmitBoundImageAtomicOr32(EmitContext&);
576void EmitBoundImageAtomicXor32(EmitContext&);
577void EmitBoundImageAtomicExchange32(EmitContext&);
578void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
579 ScalarU32 value);
580void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
581 ScalarS32 value);
582void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
583 ScalarU32 value);
584void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
585 ScalarS32 value);
586void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
587 ScalarU32 value);
588void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
589 ScalarU32 value);
590void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
591 ScalarU32 value);
592void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
593 ScalarU32 value);
594void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
595 ScalarU32 value);
596void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
597 ScalarU32 value);
598void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
599 Register coord, ScalarU32 value);
600void EmitLaneId(EmitContext& ctx, IR::Inst& inst);
601void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
602void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
603void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
604void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
605void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst);
606void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst);
607void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst);
608void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst);
609void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst);
610void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
611 const IR::Value& clamp, const IR::Value& segmentation_mask);
612void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
613 const IR::Value& clamp, const IR::Value& segmentation_mask);
614void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
615 const IR::Value& clamp, const IR::Value& segmentation_mask);
616void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
617 const IR::Value& clamp, const IR::Value& segmentation_mask);
618void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b,
619 ScalarU32 swizzle);
620void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
621void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
622void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
623void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
624
625} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
new file mode 100644
index 000000000..f55c26b76
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
@@ -0,0 +1,294 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/glasm/emit_context.h"
6#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
7#include "shader_recompiler/frontend/ir/value.h"
8
9namespace Shader::Backend::GLASM {
10namespace {
11void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b,
12 std::string_view lop) {
13 const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp);
14 const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp);
15 if (zero) {
16 zero->Invalidate();
17 }
18 if (sign) {
19 sign->Invalidate();
20 }
21 if (zero || sign) {
22 ctx.reg_alloc.InvalidateConditionCodes();
23 }
24 const auto ret{ctx.reg_alloc.Define(inst)};
25 ctx.Add("{}.S {}.x,{},{};", lop, ret, a, b);
26 if (zero) {
27 ctx.Add("SEQ.S {},{},0;", *zero, ret);
28 }
29 if (sign) {
30 ctx.Add("SLT.S {},{},0;", *sign, ret);
31 }
32}
33} // Anonymous namespace
34
35void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
36 const std::array flags{
37 inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp),
38 inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp),
39 inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp),
40 inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp),
41 };
42 for (IR::Inst* const flag_inst : flags) {
43 if (flag_inst) {
44 flag_inst->Invalidate();
45 }
46 }
47 const bool cc{inst.HasAssociatedPseudoOperation()};
48 const std::string_view cc_mod{cc ? ".CC" : ""};
49 if (cc) {
50 ctx.reg_alloc.InvalidateConditionCodes();
51 }
52 const auto ret{ctx.reg_alloc.Define(inst)};
53 ctx.Add("ADD.S{} {}.x,{},{};", cc_mod, ret, a, b);
54 if (!cc) {
55 return;
56 }
57 static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"};
58 for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) {
59 if (!flags[flag_index]) {
60 continue;
61 }
62 const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])};
63 if (flag_index == 0) {
64 ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret);
65 } else {
66 // We could use conditional execution here, but it's broken on Nvidia's compiler
67 ctx.Add("IF {}.x;"
68 "MOV.S {}.x,-1;"
69 "ELSE;"
70 "MOV.S {}.x,0;"
71 "ENDIF;",
72 masks[flag_index], flag_ret, flag_ret);
73 }
74 }
75}
76
77void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) {
78 ctx.LongAdd("ADD.S64 {}.x,{}.x,{}.x;", inst, a, b);
79}
80
81void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
82 ctx.Add("SUB.S {}.x,{},{};", inst, a, b);
83}
84
85void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) {
86 ctx.LongAdd("SUB.S64 {}.x,{}.x,{}.x;", inst, a, b);
87}
88
89void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
90 ctx.Add("MUL.S {}.x,{},{};", inst, a, b);
91}
92
93void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
94 if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) {
95 ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32));
96 } else {
97 ctx.Add("MOV.S {},-{};", inst, value);
98 }
99}
100
101void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value) {
102 ctx.LongAdd("MOV.S64 {},-{};", inst, value);
103}
104
105void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
106 ctx.Add("ABS.S {},{};", inst, value);
107}
108
109void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) {
110 ctx.Add("SHL.U {}.x,{},{};", inst, base, shift);
111}
112
113void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
114 ScalarU32 shift) {
115 ctx.LongAdd("SHL.U64 {}.x,{},{};", inst, base, shift);
116}
117
118void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) {
119 ctx.Add("SHR.U {}.x,{},{};", inst, base, shift);
120}
121
122void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
123 ScalarU32 shift) {
124 ctx.LongAdd("SHR.U64 {}.x,{},{};", inst, base, shift);
125}
126
127void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift) {
128 ctx.Add("SHR.S {}.x,{},{};", inst, base, shift);
129}
130
131void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
132 ScalarS32 shift) {
133 ctx.LongAdd("SHR.S64 {}.x,{},{};", inst, base, shift);
134}
135
136void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
137 BitwiseLogicalOp(ctx, inst, a, b, "AND");
138}
139
140void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
141 BitwiseLogicalOp(ctx, inst, a, b, "OR");
142}
143
144void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
145 BitwiseLogicalOp(ctx, inst, a, b, "XOR");
146}
147
148void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert,
149 ScalarS32 offset, ScalarS32 count) {
150 const Register ret{ctx.reg_alloc.Define(inst)};
151 if (count.type != Type::Register && offset.type != Type::Register) {
152 ctx.Add("BFI.S {},{{{},{},0,0}},{},{};", ret, count, offset, insert, base);
153 } else {
154 ctx.Add("MOV.S RC.x,{};"
155 "MOV.S RC.y,{};"
156 "BFI.S {},RC,{},{};",
157 count, offset, ret, insert, base);
158 }
159}
160
161void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset,
162 ScalarS32 count) {
163 const Register ret{ctx.reg_alloc.Define(inst)};
164 if (count.type != Type::Register && offset.type != Type::Register) {
165 ctx.Add("BFE.S {},{{{},{},0,0}},{};", ret, count, offset, base);
166 } else {
167 ctx.Add("MOV.S RC.x,{};"
168 "MOV.S RC.y,{};"
169 "BFE.S {},RC,{};",
170 count, offset, ret, base);
171 }
172}
173
174void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
175 ScalarU32 count) {
176 const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp);
177 const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp);
178 if (zero) {
179 zero->Invalidate();
180 }
181 if (sign) {
182 sign->Invalidate();
183 }
184 if (zero || sign) {
185 ctx.reg_alloc.InvalidateConditionCodes();
186 }
187 const Register ret{ctx.reg_alloc.Define(inst)};
188 if (count.type != Type::Register && offset.type != Type::Register) {
189 ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base);
190 } else {
191 ctx.Add("MOV.U RC.x,{};"
192 "MOV.U RC.y,{};"
193 "BFE.U {},RC,{};",
194 count, offset, ret, base);
195 }
196 if (zero) {
197 ctx.Add("SEQ.S {},{},0;", *zero, ret);
198 }
199 if (sign) {
200 ctx.Add("SLT.S {},{},0;", *sign, ret);
201 }
202}
203
204void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
205 ctx.Add("BFR {},{};", inst, value);
206}
207
208void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
209 ctx.Add("BTC {},{};", inst, value);
210}
211
212void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
213 ctx.Add("NOT.S {},{};", inst, value);
214}
215
216void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
217 ctx.Add("BTFM.S {},{};", inst, value);
218}
219
220void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
221 ctx.Add("BTFM.U {},{};", inst, value);
222}
223
224void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
225 ctx.Add("MIN.S {},{},{};", inst, a, b);
226}
227
228void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) {
229 ctx.Add("MIN.U {},{},{};", inst, a, b);
230}
231
232void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
233 ctx.Add("MAX.S {},{},{};", inst, a, b);
234}
235
236void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) {
237 ctx.Add("MAX.U {},{},{};", inst, a, b);
238}
239
240void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max) {
241 const Register ret{ctx.reg_alloc.Define(inst)};
242 ctx.Add("MIN.S RC.x,{},{};"
243 "MAX.S {}.x,RC.x,{};",
244 max, value, ret, min);
245}
246
247void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max) {
248 const Register ret{ctx.reg_alloc.Define(inst)};
249 ctx.Add("MIN.U RC.x,{},{};"
250 "MAX.U {}.x,RC.x,{};",
251 max, value, ret, min);
252}
253
254void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
255 ctx.Add("SLT.S {}.x,{},{};", inst, lhs, rhs);
256}
257
258void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
259 ctx.Add("SLT.U {}.x,{},{};", inst, lhs, rhs);
260}
261
262void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
263 ctx.Add("SEQ.S {}.x,{},{};", inst, lhs, rhs);
264}
265
266void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
267 ctx.Add("SLE.S {}.x,{},{};", inst, lhs, rhs);
268}
269
270void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
271 ctx.Add("SLE.U {}.x,{},{};", inst, lhs, rhs);
272}
273
274void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
275 ctx.Add("SGT.S {}.x,{},{};", inst, lhs, rhs);
276}
277
278void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
279 ctx.Add("SGT.U {}.x,{},{};", inst, lhs, rhs);
280}
281
282void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
283 ctx.Add("SNE.U {}.x,{},{};", inst, lhs, rhs);
284}
285
286void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
287 ctx.Add("SGE.S {}.x,{},{};", inst, lhs, rhs);
288}
289
290void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
291 ctx.Add("SGE.U {}.x,{},{};", inst, lhs, rhs);
292}
293
294} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
new file mode 100644
index 000000000..af9fac7c1
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
@@ -0,0 +1,568 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glasm/emit_context.h"
8#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
9#include "shader_recompiler/frontend/ir/program.h"
10#include "shader_recompiler/frontend/ir/value.h"
11#include "shader_recompiler/runtime_info.h"
12
13namespace Shader::Backend::GLASM {
14namespace {
15void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
16 std::string_view then_expr, std::string_view else_expr = {}) {
17 // Operate on bindless SSBO, call the expression with bounds checking
18 // address = c[binding].xy
19 // length = c[binding].z
20 const u32 sb_binding{binding.U32()};
21 ctx.Add("PK64.U DC,c[{}];" // pointer = address
22 "CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset)
23 "ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset
24 "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length
25 sb_binding, offset, offset, sb_binding);
26 if (else_expr.empty()) {
27 ctx.Add("IF NE.x;{}ENDIF;", then_expr);
28 } else {
29 ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr);
30 }
31}
32
33void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr,
34 std::string_view else_expr = {}) {
35 const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()};
36 for (size_t index = 0; index < num_buffers; ++index) {
37 if (!ctx.info.nvn_buffer_used[index]) {
38 continue;
39 }
40 const auto& ssbo{ctx.info.storage_buffers_descriptors[index]};
41 ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr
42 "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32
43 "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32
44 "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size
45 "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 0
46 "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 0
47 "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b
48 "IF NE.x;" // if cond
49 "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr
50 ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address,
51 address, address);
52 if (pointer_based) {
53 ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf
54 "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset
55 "{}"
56 "ELSE;",
57 index, expr);
58 } else {
59 ctx.Add("CVT.U32.U64 RC.x,DC.x;"
60 "{},ssbo{}[RC.x];"
61 "ELSE;",
62 expr, index);
63 }
64 }
65 if (!else_expr.empty()) {
66 ctx.Add("{}", else_expr);
67 }
68 const size_t num_used_buffers{ctx.info.nvn_buffer_used.count()};
69 for (size_t index = 0; index < num_used_buffers; ++index) {
70 ctx.Add("ENDIF;");
71 }
72}
73
74template <typename ValueType>
75void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value,
76 std::string_view size) {
77 if (ctx.runtime_info.glasm_use_storage_buffers) {
78 ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset);
79 } else {
80 StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value));
81 }
82}
83
84void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
85 std::string_view size) {
86 const Register ret{ctx.reg_alloc.Define(inst)};
87 if (ctx.runtime_info.glasm_use_storage_buffers) {
88 ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset);
89 } else {
90 StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret),
91 fmt::format("MOV.U {},{{0,0,0,0}};", ret));
92 }
93}
94
95template <typename ValueType>
96void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) {
97 if (ctx.runtime_info.glasm_use_storage_buffers) {
98 GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value));
99 } else {
100 GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value));
101 }
102}
103
104void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) {
105 const Register ret{ctx.reg_alloc.Define(inst)};
106 if (ctx.runtime_info.glasm_use_storage_buffers) {
107 GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret));
108 } else {
109 GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret),
110 fmt::format("MOV.S {},0;", ret));
111 }
112}
113
114template <typename ValueType>
115void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
116 ValueType value, std::string_view operation, std::string_view size) {
117 const Register ret{ctx.reg_alloc.Define(inst)};
118 if (ctx.runtime_info.glasm_use_storage_buffers) {
119 ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(),
120 offset);
121 } else {
122 StorageOp(ctx, binding, offset,
123 fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value));
124 }
125}
126} // Anonymous namespace
127
128void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address) {
129 GlobalLoad(ctx, inst, address, "U8");
130}
131
132void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address) {
133 GlobalLoad(ctx, inst, address, "S8");
134}
135
136void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address) {
137 GlobalLoad(ctx, inst, address, "U16");
138}
139
140void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address) {
141 GlobalLoad(ctx, inst, address, "S16");
142}
143
144void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address) {
145 GlobalLoad(ctx, inst, address, "U32");
146}
147
148void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address) {
149 GlobalLoad(ctx, inst, address, "U32X2");
150}
151
152void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address) {
153 GlobalLoad(ctx, inst, address, "U32X4");
154}
155
156void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value) {
157 GlobalWrite(ctx, address, value, "U8");
158}
159
160void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value) {
161 GlobalWrite(ctx, address, value, "S8");
162}
163
164void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value) {
165 GlobalWrite(ctx, address, value, "U16");
166}
167
168void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value) {
169 GlobalWrite(ctx, address, value, "S16");
170}
171
172void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value) {
173 GlobalWrite(ctx, address, value, "U32");
174}
175
176void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value) {
177 GlobalWrite(ctx, address, value, "U32X2");
178}
179
180void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value) {
181 GlobalWrite(ctx, address, value, "U32X4");
182}
183
184void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
185 ScalarU32 offset) {
186 Load(ctx, inst, binding, offset, "U8");
187}
188
189void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
190 ScalarU32 offset) {
191 Load(ctx, inst, binding, offset, "S8");
192}
193
194void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
195 ScalarU32 offset) {
196 Load(ctx, inst, binding, offset, "U16");
197}
198
199void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
200 ScalarU32 offset) {
201 Load(ctx, inst, binding, offset, "S16");
202}
203
204void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
205 ScalarU32 offset) {
206 Load(ctx, inst, binding, offset, "U32");
207}
208
209void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
210 ScalarU32 offset) {
211 Load(ctx, inst, binding, offset, "U32X2");
212}
213
214void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
215 ScalarU32 offset) {
216 Load(ctx, inst, binding, offset, "U32X4");
217}
218
219void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
220 ScalarU32 value) {
221 Write(ctx, binding, offset, value, "U8");
222}
223
224void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
225 ScalarS32 value) {
226 Write(ctx, binding, offset, value, "S8");
227}
228
229void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
230 ScalarU32 value) {
231 Write(ctx, binding, offset, value, "U16");
232}
233
234void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
235 ScalarS32 value) {
236 Write(ctx, binding, offset, value, "S16");
237}
238
239void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
240 ScalarU32 value) {
241 Write(ctx, binding, offset, value, "U32");
242}
243
244void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
245 Register value) {
246 Write(ctx, binding, offset, value, "U32X2");
247}
248
249void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
250 Register value) {
251 Write(ctx, binding, offset, value, "U32X4");
252}
253
254void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
255 ScalarU32 value) {
256 ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
257}
258
259void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
260 ScalarS32 value) {
261 ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
262}
263
264void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
265 ScalarU32 value) {
266 ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
267}
268
269void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
270 ScalarS32 value) {
271 ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
272}
273
274void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
275 ScalarU32 value) {
276 ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
277}
278
279void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
280 ScalarU32 value) {
281 ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
282}
283
284void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
285 ScalarU32 value) {
286 ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
287}
288
289void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
290 ScalarU32 value) {
291 ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
292}
293
294void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
295 ScalarU32 value) {
296 ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
297}
298
299void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
300 ScalarU32 value) {
301 ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
302}
303
304void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
305 ScalarU32 value) {
306 ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
307}
308
309void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
310 Register value) {
311 ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset);
312}
313
314void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
315 ScalarU32 offset, ScalarU32 value) {
316 Atom(ctx, inst, binding, offset, value, "ADD", "U32");
317}
318
319void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
320 ScalarU32 offset, ScalarS32 value) {
321 Atom(ctx, inst, binding, offset, value, "MIN", "S32");
322}
323
324void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
325 ScalarU32 offset, ScalarU32 value) {
326 Atom(ctx, inst, binding, offset, value, "MIN", "U32");
327}
328
329void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
330 ScalarU32 offset, ScalarS32 value) {
331 Atom(ctx, inst, binding, offset, value, "MAX", "S32");
332}
333
334void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
335 ScalarU32 offset, ScalarU32 value) {
336 Atom(ctx, inst, binding, offset, value, "MAX", "U32");
337}
338
339void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
340 ScalarU32 offset, ScalarU32 value) {
341 Atom(ctx, inst, binding, offset, value, "IWRAP", "U32");
342}
343
344void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
345 ScalarU32 offset, ScalarU32 value) {
346 Atom(ctx, inst, binding, offset, value, "DWRAP", "U32");
347}
348
349void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
350 ScalarU32 offset, ScalarU32 value) {
351 Atom(ctx, inst, binding, offset, value, "AND", "U32");
352}
353
354void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
355 ScalarU32 offset, ScalarU32 value) {
356 Atom(ctx, inst, binding, offset, value, "OR", "U32");
357}
358
359void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
360 ScalarU32 offset, ScalarU32 value) {
361 Atom(ctx, inst, binding, offset, value, "XOR", "U32");
362}
363
364void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
365 ScalarU32 offset, ScalarU32 value) {
366 Atom(ctx, inst, binding, offset, value, "EXCH", "U32");
367}
368
369void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
370 ScalarU32 offset, Register value) {
371 Atom(ctx, inst, binding, offset, value, "ADD", "U64");
372}
373
374void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
375 ScalarU32 offset, Register value) {
376 Atom(ctx, inst, binding, offset, value, "MIN", "S64");
377}
378
379void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
380 ScalarU32 offset, Register value) {
381 Atom(ctx, inst, binding, offset, value, "MIN", "U64");
382}
383
384void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
385 ScalarU32 offset, Register value) {
386 Atom(ctx, inst, binding, offset, value, "MAX", "S64");
387}
388
389void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
390 ScalarU32 offset, Register value) {
391 Atom(ctx, inst, binding, offset, value, "MAX", "U64");
392}
393
394void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
395 ScalarU32 offset, Register value) {
396 Atom(ctx, inst, binding, offset, value, "AND", "U64");
397}
398
399void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
400 ScalarU32 offset, Register value) {
401 Atom(ctx, inst, binding, offset, value, "OR", "U64");
402}
403
404void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
405 ScalarU32 offset, Register value) {
406 Atom(ctx, inst, binding, offset, value, "XOR", "U64");
407}
408
409void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
410 ScalarU32 offset, Register value) {
411 Atom(ctx, inst, binding, offset, value, "EXCH", "U64");
412}
413
414void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
415 ScalarU32 offset, ScalarF32 value) {
416 Atom(ctx, inst, binding, offset, value, "ADD", "F32");
417}
418
419void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
420 ScalarU32 offset, Register value) {
421 Atom(ctx, inst, binding, offset, value, "ADD", "F16x2");
422}
423
424void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
425 [[maybe_unused]] const IR::Value& binding,
426 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
427 throw NotImplementedException("GLASM instruction");
428}
429
430void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
431 ScalarU32 offset, Register value) {
432 Atom(ctx, inst, binding, offset, value, "MIN", "F16x2");
433}
434
435void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
436 [[maybe_unused]] const IR::Value& binding,
437 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
438 throw NotImplementedException("GLASM instruction");
439}
440
441void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
442 ScalarU32 offset, Register value) {
443 Atom(ctx, inst, binding, offset, value, "MAX", "F16x2");
444}
445
446void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
447 [[maybe_unused]] const IR::Value& binding,
448 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
449 throw NotImplementedException("GLASM instruction");
450}
451
452void EmitGlobalAtomicIAdd32(EmitContext&) {
453 throw NotImplementedException("GLASM instruction");
454}
455
456void EmitGlobalAtomicSMin32(EmitContext&) {
457 throw NotImplementedException("GLASM instruction");
458}
459
460void EmitGlobalAtomicUMin32(EmitContext&) {
461 throw NotImplementedException("GLASM instruction");
462}
463
464void EmitGlobalAtomicSMax32(EmitContext&) {
465 throw NotImplementedException("GLASM instruction");
466}
467
468void EmitGlobalAtomicUMax32(EmitContext&) {
469 throw NotImplementedException("GLASM instruction");
470}
471
472void EmitGlobalAtomicInc32(EmitContext&) {
473 throw NotImplementedException("GLASM instruction");
474}
475
476void EmitGlobalAtomicDec32(EmitContext&) {
477 throw NotImplementedException("GLASM instruction");
478}
479
480void EmitGlobalAtomicAnd32(EmitContext&) {
481 throw NotImplementedException("GLASM instruction");
482}
483
484void EmitGlobalAtomicOr32(EmitContext&) {
485 throw NotImplementedException("GLASM instruction");
486}
487
488void EmitGlobalAtomicXor32(EmitContext&) {
489 throw NotImplementedException("GLASM instruction");
490}
491
492void EmitGlobalAtomicExchange32(EmitContext&) {
493 throw NotImplementedException("GLASM instruction");
494}
495
496void EmitGlobalAtomicIAdd64(EmitContext&) {
497 throw NotImplementedException("GLASM instruction");
498}
499
500void EmitGlobalAtomicSMin64(EmitContext&) {
501 throw NotImplementedException("GLASM instruction");
502}
503
504void EmitGlobalAtomicUMin64(EmitContext&) {
505 throw NotImplementedException("GLASM instruction");
506}
507
508void EmitGlobalAtomicSMax64(EmitContext&) {
509 throw NotImplementedException("GLASM instruction");
510}
511
512void EmitGlobalAtomicUMax64(EmitContext&) {
513 throw NotImplementedException("GLASM instruction");
514}
515
516void EmitGlobalAtomicInc64(EmitContext&) {
517 throw NotImplementedException("GLASM instruction");
518}
519
520void EmitGlobalAtomicDec64(EmitContext&) {
521 throw NotImplementedException("GLASM instruction");
522}
523
524void EmitGlobalAtomicAnd64(EmitContext&) {
525 throw NotImplementedException("GLASM instruction");
526}
527
528void EmitGlobalAtomicOr64(EmitContext&) {
529 throw NotImplementedException("GLASM instruction");
530}
531
532void EmitGlobalAtomicXor64(EmitContext&) {
533 throw NotImplementedException("GLASM instruction");
534}
535
536void EmitGlobalAtomicExchange64(EmitContext&) {
537 throw NotImplementedException("GLASM instruction");
538}
539
540void EmitGlobalAtomicAddF32(EmitContext&) {
541 throw NotImplementedException("GLASM instruction");
542}
543
544void EmitGlobalAtomicAddF16x2(EmitContext&) {
545 throw NotImplementedException("GLASM instruction");
546}
547
548void EmitGlobalAtomicAddF32x2(EmitContext&) {
549 throw NotImplementedException("GLASM instruction");
550}
551
552void EmitGlobalAtomicMinF16x2(EmitContext&) {
553 throw NotImplementedException("GLASM instruction");
554}
555
556void EmitGlobalAtomicMinF32x2(EmitContext&) {
557 throw NotImplementedException("GLASM instruction");
558}
559
560void EmitGlobalAtomicMaxF16x2(EmitContext&) {
561 throw NotImplementedException("GLASM instruction");
562}
563
564void EmitGlobalAtomicMaxF32x2(EmitContext&) {
565 throw NotImplementedException("GLASM instruction");
566}
567
568} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
new file mode 100644
index 000000000..ff64c6924
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
@@ -0,0 +1,273 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glasm/emit_context.h"
8#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
9#include "shader_recompiler/frontend/ir/program.h"
10#include "shader_recompiler/frontend/ir/value.h"
11
12#ifdef _MSC_VER
13#pragma warning(disable : 4100)
14#endif
15
16namespace Shader::Backend::GLASM {
17
18#define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__)
19
20static void DefinePhi(EmitContext& ctx, IR::Inst& phi) {
21 switch (phi.Arg(0).Type()) {
22 case IR::Type::U1:
23 case IR::Type::U32:
24 case IR::Type::F32:
25 ctx.reg_alloc.Define(phi);
26 break;
27 case IR::Type::U64:
28 case IR::Type::F64:
29 ctx.reg_alloc.LongDefine(phi);
30 break;
31 default:
32 throw NotImplementedException("Phi node type {}", phi.Type());
33 }
34}
35
36void EmitPhi(EmitContext& ctx, IR::Inst& phi) {
37 const size_t num_args{phi.NumArgs()};
38 for (size_t i = 0; i < num_args; ++i) {
39 ctx.reg_alloc.Consume(phi.Arg(i));
40 }
41 if (!phi.Definition<Id>().is_valid) {
42 // The phi node wasn't forward defined
43 DefinePhi(ctx, phi);
44 }
45}
46
47void EmitVoid(EmitContext&) {}
48
49void EmitReference(EmitContext& ctx, const IR::Value& value) {
50 ctx.reg_alloc.Consume(value);
51}
52
53void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) {
54 IR::Inst& phi{RegAlloc::AliasInst(*phi_value.Inst())};
55 if (!phi.Definition<Id>().is_valid) {
56 // The phi node wasn't forward defined
57 DefinePhi(ctx, phi);
58 }
59 const Register phi_reg{ctx.reg_alloc.Consume(IR::Value{&phi})};
60 const Value eval_value{ctx.reg_alloc.Consume(value)};
61
62 if (phi_reg == eval_value) {
63 return;
64 }
65 switch (phi.Flags<IR::Type>()) {
66 case IR::Type::U1:
67 case IR::Type::U32:
68 case IR::Type::F32:
69 ctx.Add("MOV.S {}.x,{};", phi_reg, ScalarS32{eval_value});
70 break;
71 case IR::Type::U64:
72 case IR::Type::F64:
73 ctx.Add("MOV.U64 {}.x,{};", phi_reg, ScalarRegister{eval_value});
74 break;
75 default:
76 throw NotImplementedException("Phi node type {}", phi.Type());
77 }
78}
79
80void EmitJoin(EmitContext& ctx) {
81 NotImplemented();
82}
83
84void EmitDemoteToHelperInvocation(EmitContext& ctx) {
85 ctx.Add("KIL TR.x;");
86}
87
88void EmitBarrier(EmitContext& ctx) {
89 ctx.Add("BAR;");
90}
91
92void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
93 ctx.Add("MEMBAR.CTA;");
94}
95
96void EmitDeviceMemoryBarrier(EmitContext& ctx) {
97 ctx.Add("MEMBAR;");
98}
99
100void EmitPrologue(EmitContext& ctx) {
101 // TODO
102}
103
104void EmitEpilogue(EmitContext& ctx) {
105 // TODO
106}
107
108void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream) {
109 if (stream.type == Type::U32 && stream.imm_u32 == 0) {
110 ctx.Add("EMIT;");
111 } else {
112 ctx.Add("EMITS {};", stream);
113 }
114}
115
116void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
117 if (!stream.IsImmediate()) {
118 LOG_WARNING(Shader_GLASM, "Stream is not immediate");
119 }
120 ctx.reg_alloc.Consume(stream);
121 ctx.Add("ENDPRIM;");
122}
123
124void EmitGetRegister(EmitContext& ctx) {
125 NotImplemented();
126}
127
128void EmitSetRegister(EmitContext& ctx) {
129 NotImplemented();
130}
131
132void EmitGetPred(EmitContext& ctx) {
133 NotImplemented();
134}
135
136void EmitSetPred(EmitContext& ctx) {
137 NotImplemented();
138}
139
140void EmitSetGotoVariable(EmitContext& ctx) {
141 NotImplemented();
142}
143
144void EmitGetGotoVariable(EmitContext& ctx) {
145 NotImplemented();
146}
147
148void EmitSetIndirectBranchVariable(EmitContext& ctx) {
149 NotImplemented();
150}
151
152void EmitGetIndirectBranchVariable(EmitContext& ctx) {
153 NotImplemented();
154}
155
156void EmitGetZFlag(EmitContext& ctx) {
157 NotImplemented();
158}
159
160void EmitGetSFlag(EmitContext& ctx) {
161 NotImplemented();
162}
163
164void EmitGetCFlag(EmitContext& ctx) {
165 NotImplemented();
166}
167
168void EmitGetOFlag(EmitContext& ctx) {
169 NotImplemented();
170}
171
172void EmitSetZFlag(EmitContext& ctx) {
173 NotImplemented();
174}
175
176void EmitSetSFlag(EmitContext& ctx) {
177 NotImplemented();
178}
179
180void EmitSetCFlag(EmitContext& ctx) {
181 NotImplemented();
182}
183
184void EmitSetOFlag(EmitContext& ctx) {
185 NotImplemented();
186}
187
188void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) {
189 ctx.Add("MOV.S {},invocation.groupid;", inst);
190}
191
192void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) {
193 ctx.Add("MOV.S {},invocation.localid;", inst);
194}
195
196void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) {
197 ctx.Add("MOV.S {}.x,primitive_invocation.x;", inst);
198}
199
200void EmitSampleId(EmitContext& ctx, IR::Inst& inst) {
201 ctx.Add("MOV.S {}.x,fragment.sampleid.x;", inst);
202}
203
204void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) {
205 ctx.Add("MOV.S {}.x,fragment.helperthread.x;", inst);
206}
207
208void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
209 ctx.uses_y_direction = true;
210 ctx.Add("MOV.F {}.x,y_direction[0].w;", inst);
211}
212
213void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) {
214 ctx.Add("MOV.S {}.x,0;", inst);
215}
216
217void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) {
218 ctx.Add("MOV.S {}.x,0;", inst);
219}
220
221void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) {
222 ctx.Add("MOV.S {}.x,0;", inst);
223}
224
225void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) {
226 ctx.Add("MOV.S {}.x,0;", inst);
227}
228
229void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) {
230 ctx.LongAdd("MOV.S64 {}.x,0;", inst);
231}
232
233void EmitGetZeroFromOp(EmitContext& ctx) {
234 NotImplemented();
235}
236
237void EmitGetSignFromOp(EmitContext& ctx) {
238 NotImplemented();
239}
240
241void EmitGetCarryFromOp(EmitContext& ctx) {
242 NotImplemented();
243}
244
245void EmitGetOverflowFromOp(EmitContext& ctx) {
246 NotImplemented();
247}
248
249void EmitGetSparseFromOp(EmitContext& ctx) {
250 NotImplemented();
251}
252
253void EmitGetInBoundsFromOp(EmitContext& ctx) {
254 NotImplemented();
255}
256
257void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
258 ctx.Add("OR.S {},{},{};", inst, a, b);
259}
260
261void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
262 ctx.Add("AND.S {},{},{};", inst, a, b);
263}
264
265void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
266 ctx.Add("XOR.S {},{},{};", inst, a, b);
267}
268
269void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
270 ctx.Add("SEQ.S {},{},0;", inst, value);
271}
272
273} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp
new file mode 100644
index 000000000..68fff613c
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp
@@ -0,0 +1,67 @@
1
2// Copyright 2021 yuzu Emulator Project
3// Licensed under GPLv2 or any later version
4// Refer to the license.txt file included.
5
6#include "shader_recompiler/backend/glasm/emit_context.h"
7#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
8#include "shader_recompiler/frontend/ir/value.h"
9
10namespace Shader::Backend::GLASM {
11
12void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
13 ScalarS32 false_value) {
14 ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
15}
16
17void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
18 [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) {
19 throw NotImplementedException("GLASM instruction");
20}
21
22void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
23 [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) {
24 throw NotImplementedException("GLASM instruction");
25}
26
27void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
28 ScalarS32 false_value) {
29 ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
30}
31
32void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value,
33 Register false_value) {
34 ctx.reg_alloc.InvalidateConditionCodes();
35 const Register ret{ctx.reg_alloc.LongDefine(inst)};
36 if (ret == true_value) {
37 ctx.Add("MOV.S.CC RC.x,{};"
38 "MOV.U64 {}.x(EQ.x),{};",
39 cond, ret, false_value);
40 } else if (ret == false_value) {
41 ctx.Add("MOV.S.CC RC.x,{};"
42 "MOV.U64 {}.x(NE.x),{};",
43 cond, ret, true_value);
44 } else {
45 ctx.Add("MOV.S.CC RC.x,{};"
46 "MOV.U64 {}.x,{};"
47 "MOV.U64 {}.x(NE.x),{};",
48 cond, ret, false_value, ret, true_value);
49 }
50}
51
52void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
53 [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) {
54 throw NotImplementedException("GLASM instruction");
55}
56
57void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
58 ScalarS32 false_value) {
59 ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
60}
61
62void EmitSelectF64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
63 [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) {
64 throw NotImplementedException("GLASM instruction");
65}
66
67} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp
new file mode 100644
index 000000000..c1498f449
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp
@@ -0,0 +1,58 @@
1
2// Copyright 2021 yuzu Emulator Project
3// Licensed under GPLv2 or any later version
4// Refer to the license.txt file included.
5
6#include "shader_recompiler/backend/glasm/emit_context.h"
7#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
8#include "shader_recompiler/frontend/ir/value.h"
9
10namespace Shader::Backend::GLASM {
11void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
12 ctx.Add("LDS.U8 {},shared_mem[{}];", inst, offset);
13}
14
15void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
16 ctx.Add("LDS.S8 {},shared_mem[{}];", inst, offset);
17}
18
19void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
20 ctx.Add("LDS.U16 {},shared_mem[{}];", inst, offset);
21}
22
23void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
24 ctx.Add("LDS.S16 {},shared_mem[{}];", inst, offset);
25}
26
27void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
28 ctx.Add("LDS.U32 {},shared_mem[{}];", inst, offset);
29}
30
31void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
32 ctx.Add("LDS.U32X2 {},shared_mem[{}];", inst, offset);
33}
34
35void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
36 ctx.Add("LDS.U32X4 {},shared_mem[{}];", inst, offset);
37}
38
39void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
40 ctx.Add("STS.U8 {},shared_mem[{}];", value, offset);
41}
42
43void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
44 ctx.Add("STS.U16 {},shared_mem[{}];", value, offset);
45}
46
47void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
48 ctx.Add("STS.U32 {},shared_mem[{}];", value, offset);
49}
50
51void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value) {
52 ctx.Add("STS.U32X2 {},shared_mem[{}];", value, offset);
53}
54
55void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value) {
56 ctx.Add("STS.U32X4 {},shared_mem[{}];", value, offset);
57}
58} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp
new file mode 100644
index 000000000..544d475b4
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp
@@ -0,0 +1,150 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/glasm/emit_context.h"
6#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
7#include "shader_recompiler/frontend/ir/value.h"
8#include "shader_recompiler/profile.h"
9
10namespace Shader::Backend::GLASM {
11
12void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
13 ctx.Add("MOV.S {}.x,{}.threadid;", inst, ctx.stage_name);
14}
15
16void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
17 ctx.Add("TGALL.S {}.x,{};", inst, pred);
18}
19
20void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
21 ctx.Add("TGANY.S {}.x,{};", inst, pred);
22}
23
24void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
25 ctx.Add("TGEQ.S {}.x,{};", inst, pred);
26}
27
28void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
29 ctx.Add("TGBALLOT {}.x,{};", inst, pred);
30}
31
32void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
33 ctx.Add("MOV.U {},{}.threadeqmask;", inst, ctx.stage_name);
34}
35
36void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
37 ctx.Add("MOV.U {},{}.threadltmask;", inst, ctx.stage_name);
38}
39
40void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
41 ctx.Add("MOV.U {},{}.threadlemask;", inst, ctx.stage_name);
42}
43
44void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
45 ctx.Add("MOV.U {},{}.threadgtmask;", inst, ctx.stage_name);
46}
47
48void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
49 ctx.Add("MOV.U {},{}.threadgemask;", inst, ctx.stage_name);
50}
51
52static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
53 const IR::Value& clamp, const IR::Value& segmentation_mask,
54 std::string_view op) {
55 IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
56 if (in_bounds) {
57 in_bounds->Invalidate();
58 }
59 std::string mask;
60 if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) {
61 mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8));
62 } else {
63 mask = "RC";
64 ctx.Add("BFI.U RC.x,{{5,8,0,0}},{},{};",
65 ScalarU32{ctx.reg_alloc.Consume(segmentation_mask)},
66 ScalarU32{ctx.reg_alloc.Consume(clamp)});
67 }
68 const Register value_ret{ctx.reg_alloc.Define(inst)};
69 if (in_bounds) {
70 const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)};
71 ctx.Add("SHF{}.U {},{},{},{};"
72 "MOV.U {}.x,{}.y;",
73 op, bounds_ret, value, index, mask, value_ret, bounds_ret);
74 } else {
75 ctx.Add("SHF{}.U {},{},{},{};"
76 "MOV.U {}.x,{}.y;",
77 op, value_ret, value, index, mask, value_ret, value_ret);
78 }
79}
80
81void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
82 const IR::Value& clamp, const IR::Value& segmentation_mask) {
83 Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "IDX");
84}
85
86void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
87 const IR::Value& clamp, const IR::Value& segmentation_mask) {
88 Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "UP");
89}
90
91void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
92 const IR::Value& clamp, const IR::Value& segmentation_mask) {
93 Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "DOWN");
94}
95
96void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
97 const IR::Value& clamp, const IR::Value& segmentation_mask) {
98 Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR");
99}
100
101void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b,
102 ScalarU32 swizzle) {
103 const auto ret{ctx.reg_alloc.Define(inst)};
104 ctx.Add("AND.U RC.z,{}.threadid,3;"
105 "SHL.U RC.z,RC.z,1;"
106 "SHR.U RC.z,{},RC.z;"
107 "AND.U RC.z,RC.z,3;"
108 "MUL.F RC.x,{},FSWZA[RC.z];"
109 "MUL.F RC.y,{},FSWZB[RC.z];"
110 "ADD.F {}.x,RC.x,RC.y;",
111 ctx.stage_name, swizzle, op_a, op_b, ret);
112}
113
114void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
115 if (ctx.profile.support_derivative_control) {
116 ctx.Add("DDX.FINE {}.x,{};", inst, p);
117 } else {
118 LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device");
119 ctx.Add("DDX {}.x,{};", inst, p);
120 }
121}
122
123void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
124 if (ctx.profile.support_derivative_control) {
125 ctx.Add("DDY.FINE {}.x,{};", inst, p);
126 } else {
127 LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device");
128 ctx.Add("DDY {}.x,{};", inst, p);
129 }
130}
131
132void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
133 if (ctx.profile.support_derivative_control) {
134 ctx.Add("DDX.COARSE {}.x,{};", inst, p);
135 } else {
136 LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device");
137 ctx.Add("DDX {}.x,{};", inst, p);
138 }
139}
140
141void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
142 if (ctx.profile.support_derivative_control) {
143 ctx.Add("DDY.COARSE {}.x,{};", inst, p);
144 } else {
145 LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device");
146 ctx.Add("DDY {}.x,{};", inst, p);
147 }
148}
149
150} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp
new file mode 100644
index 000000000..4c046db6e
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp
@@ -0,0 +1,186 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6
7#include <fmt/format.h>
8
9#include "shader_recompiler/backend/glasm/emit_context.h"
10#include "shader_recompiler/backend/glasm/reg_alloc.h"
11#include "shader_recompiler/exception.h"
12#include "shader_recompiler/frontend/ir/value.h"
13
14namespace Shader::Backend::GLASM {
15
16Register RegAlloc::Define(IR::Inst& inst) {
17 return Define(inst, false);
18}
19
20Register RegAlloc::LongDefine(IR::Inst& inst) {
21 return Define(inst, true);
22}
23
24Value RegAlloc::Peek(const IR::Value& value) {
25 if (value.IsImmediate()) {
26 return MakeImm(value);
27 } else {
28 return PeekInst(*value.Inst());
29 }
30}
31
32Value RegAlloc::Consume(const IR::Value& value) {
33 if (value.IsImmediate()) {
34 return MakeImm(value);
35 } else {
36 return ConsumeInst(*value.Inst());
37 }
38}
39
40void RegAlloc::Unref(IR::Inst& inst) {
41 IR::Inst& value_inst{AliasInst(inst)};
42 value_inst.DestructiveRemoveUsage();
43 if (!value_inst.HasUses()) {
44 Free(value_inst.Definition<Id>());
45 }
46}
47
48Register RegAlloc::AllocReg() {
49 Register ret;
50 ret.type = Type::Register;
51 ret.id = Alloc(false);
52 return ret;
53}
54
55Register RegAlloc::AllocLongReg() {
56 Register ret;
57 ret.type = Type::Register;
58 ret.id = Alloc(true);
59 return ret;
60}
61
62void RegAlloc::FreeReg(Register reg) {
63 Free(reg.id);
64}
65
66Value RegAlloc::MakeImm(const IR::Value& value) {
67 Value ret;
68 switch (value.Type()) {
69 case IR::Type::Void:
70 ret.type = Type::Void;
71 break;
72 case IR::Type::U1:
73 ret.type = Type::U32;
74 ret.imm_u32 = value.U1() ? 0xffffffff : 0;
75 break;
76 case IR::Type::U32:
77 ret.type = Type::U32;
78 ret.imm_u32 = value.U32();
79 break;
80 case IR::Type::F32:
81 ret.type = Type::U32;
82 ret.imm_u32 = Common::BitCast<u32>(value.F32());
83 break;
84 case IR::Type::U64:
85 ret.type = Type::U64;
86 ret.imm_u64 = value.U64();
87 break;
88 case IR::Type::F64:
89 ret.type = Type::U64;
90 ret.imm_u64 = Common::BitCast<u64>(value.F64());
91 break;
92 default:
93 throw NotImplementedException("Immediate type {}", value.Type());
94 }
95 return ret;
96}
97
98Register RegAlloc::Define(IR::Inst& inst, bool is_long) {
99 if (inst.HasUses()) {
100 inst.SetDefinition<Id>(Alloc(is_long));
101 } else {
102 Id id{};
103 id.is_long.Assign(is_long ? 1 : 0);
104 id.is_null.Assign(1);
105 inst.SetDefinition<Id>(id);
106 }
107 return Register{PeekInst(inst)};
108}
109
110Value RegAlloc::PeekInst(IR::Inst& inst) {
111 Value ret;
112 ret.type = Type::Register;
113 ret.id = inst.Definition<Id>();
114 return ret;
115}
116
117Value RegAlloc::ConsumeInst(IR::Inst& inst) {
118 Unref(inst);
119 return PeekInst(inst);
120}
121
122Id RegAlloc::Alloc(bool is_long) {
123 size_t& num_regs{is_long ? num_used_long_registers : num_used_registers};
124 std::bitset<NUM_REGS>& use{is_long ? long_register_use : register_use};
125 if (num_used_registers + num_used_long_registers < NUM_REGS) {
126 for (size_t reg = 0; reg < NUM_REGS; ++reg) {
127 if (use[reg]) {
128 continue;
129 }
130 num_regs = std::max(num_regs, reg + 1);
131 use[reg] = true;
132 Id ret{};
133 ret.is_valid.Assign(1);
134 ret.is_long.Assign(is_long ? 1 : 0);
135 ret.is_spill.Assign(0);
136 ret.is_condition_code.Assign(0);
137 ret.is_null.Assign(0);
138 ret.index.Assign(static_cast<u32>(reg));
139 return ret;
140 }
141 }
142 throw NotImplementedException("Register spilling");
143}
144
145void RegAlloc::Free(Id id) {
146 if (id.is_valid == 0) {
147 throw LogicError("Freeing invalid register");
148 }
149 if (id.is_spill != 0) {
150 throw NotImplementedException("Free spill");
151 }
152 if (id.is_long != 0) {
153 long_register_use[id.index] = false;
154 } else {
155 register_use[id.index] = false;
156 }
157}
158
159/*static*/ bool RegAlloc::IsAliased(const IR::Inst& inst) {
160 switch (inst.GetOpcode()) {
161 case IR::Opcode::Identity:
162 case IR::Opcode::BitCastU16F16:
163 case IR::Opcode::BitCastU32F32:
164 case IR::Opcode::BitCastU64F64:
165 case IR::Opcode::BitCastF16U16:
166 case IR::Opcode::BitCastF32U32:
167 case IR::Opcode::BitCastF64U64:
168 return true;
169 default:
170 return false;
171 }
172}
173
174/*static*/ IR::Inst& RegAlloc::AliasInst(IR::Inst& inst) {
175 IR::Inst* it{&inst};
176 while (IsAliased(*it)) {
177 const IR::Value arg{it->Arg(0)};
178 if (arg.IsImmediate()) {
179 break;
180 }
181 it = arg.InstRecursive();
182 }
183 return *it;
184}
185
186} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h
new file mode 100644
index 000000000..82aec66c6
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/reg_alloc.h
@@ -0,0 +1,303 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <bitset>
8
9#include <fmt/format.h>
10
11#include "common/bit_cast.h"
12#include "common/bit_field.h"
13#include "common/common_types.h"
14#include "shader_recompiler/exception.h"
15
16namespace Shader::IR {
17class Inst;
18class Value;
19} // namespace Shader::IR
20
21namespace Shader::Backend::GLASM {
22
23class EmitContext;
24
25enum class Type : u32 {
26 Void,
27 Register,
28 U32,
29 U64,
30};
31
32struct Id {
33 union {
34 u32 raw;
35 BitField<0, 1, u32> is_valid;
36 BitField<1, 1, u32> is_long;
37 BitField<2, 1, u32> is_spill;
38 BitField<3, 1, u32> is_condition_code;
39 BitField<4, 1, u32> is_null;
40 BitField<5, 27, u32> index;
41 };
42
43 bool operator==(Id rhs) const noexcept {
44 return raw == rhs.raw;
45 }
46 bool operator!=(Id rhs) const noexcept {
47 return !operator==(rhs);
48 }
49};
50static_assert(sizeof(Id) == sizeof(u32));
51
52struct Value {
53 Type type;
54 union {
55 Id id;
56 u32 imm_u32;
57 u64 imm_u64;
58 };
59
60 bool operator==(const Value& rhs) const noexcept {
61 if (type != rhs.type) {
62 return false;
63 }
64 switch (type) {
65 case Type::Void:
66 return true;
67 case Type::Register:
68 return id == rhs.id;
69 case Type::U32:
70 return imm_u32 == rhs.imm_u32;
71 case Type::U64:
72 return imm_u64 == rhs.imm_u64;
73 }
74 return false;
75 }
76 bool operator!=(const Value& rhs) const noexcept {
77 return !operator==(rhs);
78 }
79};
80struct Register : Value {};
81struct ScalarRegister : Value {};
82struct ScalarU32 : Value {};
83struct ScalarS32 : Value {};
84struct ScalarF32 : Value {};
85struct ScalarF64 : Value {};
86
87class RegAlloc {
88public:
89 RegAlloc() = default;
90
91 Register Define(IR::Inst& inst);
92
93 Register LongDefine(IR::Inst& inst);
94
95 [[nodiscard]] Value Peek(const IR::Value& value);
96
97 Value Consume(const IR::Value& value);
98
99 void Unref(IR::Inst& inst);
100
101 [[nodiscard]] Register AllocReg();
102
103 [[nodiscard]] Register AllocLongReg();
104
105 void FreeReg(Register reg);
106
107 void InvalidateConditionCodes() {
108 // This does nothing for now
109 }
110
111 [[nodiscard]] size_t NumUsedRegisters() const noexcept {
112 return num_used_registers;
113 }
114
115 [[nodiscard]] size_t NumUsedLongRegisters() const noexcept {
116 return num_used_long_registers;
117 }
118
119 [[nodiscard]] bool IsEmpty() const noexcept {
120 return register_use.none() && long_register_use.none();
121 }
122
123 /// Returns true if the instruction is expected to be aliased to another
124 static bool IsAliased(const IR::Inst& inst);
125
126 /// Returns the underlying value out of an alias sequence
127 static IR::Inst& AliasInst(IR::Inst& inst);
128
129private:
130 static constexpr size_t NUM_REGS = 4096;
131 static constexpr size_t NUM_ELEMENTS = 4;
132
133 Value MakeImm(const IR::Value& value);
134
135 Register Define(IR::Inst& inst, bool is_long);
136
137 Value PeekInst(IR::Inst& inst);
138
139 Value ConsumeInst(IR::Inst& inst);
140
141 Id Alloc(bool is_long);
142
143 void Free(Id id);
144
145 size_t num_used_registers{};
146 size_t num_used_long_registers{};
147 std::bitset<NUM_REGS> register_use{};
148 std::bitset<NUM_REGS> long_register_use{};
149};
150
151template <bool scalar, typename FormatContext>
152auto FormatTo(FormatContext& ctx, Id id) {
153 if (id.is_condition_code != 0) {
154 throw NotImplementedException("Condition code emission");
155 }
156 if (id.is_spill != 0) {
157 throw NotImplementedException("Spill emission");
158 }
159 if constexpr (scalar) {
160 if (id.is_null != 0) {
161 return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x");
162 }
163 if (id.is_long != 0) {
164 return fmt::format_to(ctx.out(), "D{}.x", id.index.Value());
165 } else {
166 return fmt::format_to(ctx.out(), "R{}.x", id.index.Value());
167 }
168 } else {
169 if (id.is_null != 0) {
170 return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC");
171 }
172 if (id.is_long != 0) {
173 return fmt::format_to(ctx.out(), "D{}", id.index.Value());
174 } else {
175 return fmt::format_to(ctx.out(), "R{}", id.index.Value());
176 }
177 }
178}
179
180} // namespace Shader::Backend::GLASM
181
182template <>
183struct fmt::formatter<Shader::Backend::GLASM::Id> {
184 constexpr auto parse(format_parse_context& ctx) {
185 return ctx.begin();
186 }
187 template <typename FormatContext>
188 auto format(Shader::Backend::GLASM::Id id, FormatContext& ctx) {
189 return Shader::Backend::GLASM::FormatTo<true>(ctx, id);
190 }
191};
192
193template <>
194struct fmt::formatter<Shader::Backend::GLASM::Register> {
195 constexpr auto parse(format_parse_context& ctx) {
196 return ctx.begin();
197 }
198 template <typename FormatContext>
199 auto format(const Shader::Backend::GLASM::Register& value, FormatContext& ctx) {
200 if (value.type != Shader::Backend::GLASM::Type::Register) {
201 throw Shader::InvalidArgument("Register value type is not register");
202 }
203 return Shader::Backend::GLASM::FormatTo<false>(ctx, value.id);
204 }
205};
206
207template <>
208struct fmt::formatter<Shader::Backend::GLASM::ScalarRegister> {
209 constexpr auto parse(format_parse_context& ctx) {
210 return ctx.begin();
211 }
212 template <typename FormatContext>
213 auto format(const Shader::Backend::GLASM::ScalarRegister& value, FormatContext& ctx) {
214 if (value.type != Shader::Backend::GLASM::Type::Register) {
215 throw Shader::InvalidArgument("Register value type is not register");
216 }
217 return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
218 }
219};
220
221template <>
222struct fmt::formatter<Shader::Backend::GLASM::ScalarU32> {
223 constexpr auto parse(format_parse_context& ctx) {
224 return ctx.begin();
225 }
226 template <typename FormatContext>
227 auto format(const Shader::Backend::GLASM::ScalarU32& value, FormatContext& ctx) {
228 switch (value.type) {
229 case Shader::Backend::GLASM::Type::Void:
230 break;
231 case Shader::Backend::GLASM::Type::Register:
232 return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
233 case Shader::Backend::GLASM::Type::U32:
234 return fmt::format_to(ctx.out(), "{}", value.imm_u32);
235 case Shader::Backend::GLASM::Type::U64:
236 break;
237 }
238 throw Shader::InvalidArgument("Invalid value type {}", value.type);
239 }
240};
241
242template <>
243struct fmt::formatter<Shader::Backend::GLASM::ScalarS32> {
244 constexpr auto parse(format_parse_context& ctx) {
245 return ctx.begin();
246 }
247 template <typename FormatContext>
248 auto format(const Shader::Backend::GLASM::ScalarS32& value, FormatContext& ctx) {
249 switch (value.type) {
250 case Shader::Backend::GLASM::Type::Void:
251 break;
252 case Shader::Backend::GLASM::Type::Register:
253 return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
254 case Shader::Backend::GLASM::Type::U32:
255 return fmt::format_to(ctx.out(), "{}", static_cast<s32>(value.imm_u32));
256 case Shader::Backend::GLASM::Type::U64:
257 break;
258 }
259 throw Shader::InvalidArgument("Invalid value type {}", value.type);
260 }
261};
262
263template <>
264struct fmt::formatter<Shader::Backend::GLASM::ScalarF32> {
265 constexpr auto parse(format_parse_context& ctx) {
266 return ctx.begin();
267 }
268 template <typename FormatContext>
269 auto format(const Shader::Backend::GLASM::ScalarF32& value, FormatContext& ctx) {
270 switch (value.type) {
271 case Shader::Backend::GLASM::Type::Void:
272 break;
273 case Shader::Backend::GLASM::Type::Register:
274 return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
275 case Shader::Backend::GLASM::Type::U32:
276 return fmt::format_to(ctx.out(), "{}", Common::BitCast<f32>(value.imm_u32));
277 case Shader::Backend::GLASM::Type::U64:
278 break;
279 }
280 throw Shader::InvalidArgument("Invalid value type {}", value.type);
281 }
282};
283
284template <>
285struct fmt::formatter<Shader::Backend::GLASM::ScalarF64> {
286 constexpr auto parse(format_parse_context& ctx) {
287 return ctx.begin();
288 }
289 template <typename FormatContext>
290 auto format(const Shader::Backend::GLASM::ScalarF64& value, FormatContext& ctx) {
291 switch (value.type) {
292 case Shader::Backend::GLASM::Type::Void:
293 break;
294 case Shader::Backend::GLASM::Type::Register:
295 return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
296 case Shader::Backend::GLASM::Type::U32:
297 break;
298 case Shader::Backend::GLASM::Type::U64:
299 return fmt::format_to(ctx.out(), "{}", Common::BitCast<f64>(value.imm_u64));
300 }
301 throw Shader::InvalidArgument("Invalid value type {}", value.type);
302 }
303};
diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp
new file mode 100644
index 000000000..4e6f2c0fe
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_context.cpp
@@ -0,0 +1,715 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/bindings.h"
6#include "shader_recompiler/backend/glsl/emit_context.h"
7#include "shader_recompiler/frontend/ir/program.h"
8#include "shader_recompiler/profile.h"
9#include "shader_recompiler/runtime_info.h"
10
11namespace Shader::Backend::GLSL {
12namespace {
13u32 CbufIndex(size_t offset) {
14 return (offset / 4) % 4;
15}
16
17char Swizzle(size_t offset) {
18 return "xyzw"[CbufIndex(offset)];
19}
20
21std::string_view InterpDecorator(Interpolation interp) {
22 switch (interp) {
23 case Interpolation::Smooth:
24 return "";
25 case Interpolation::Flat:
26 return "flat ";
27 case Interpolation::NoPerspective:
28 return "noperspective ";
29 }
30 throw InvalidArgument("Invalid interpolation {}", interp);
31}
32
33std::string_view InputArrayDecorator(Stage stage) {
34 switch (stage) {
35 case Stage::Geometry:
36 case Stage::TessellationControl:
37 case Stage::TessellationEval:
38 return "[]";
39 default:
40 return "";
41 }
42}
43
44bool StoresPerVertexAttributes(Stage stage) {
45 switch (stage) {
46 case Stage::VertexA:
47 case Stage::VertexB:
48 case Stage::Geometry:
49 case Stage::TessellationEval:
50 return true;
51 default:
52 return false;
53 }
54}
55
56std::string OutputDecorator(Stage stage, u32 size) {
57 switch (stage) {
58 case Stage::TessellationControl:
59 return fmt::format("[{}]", size);
60 default:
61 return "";
62 }
63}
64
65std::string_view SamplerType(TextureType type, bool is_depth) {
66 if (is_depth) {
67 switch (type) {
68 case TextureType::Color1D:
69 return "sampler1DShadow";
70 case TextureType::ColorArray1D:
71 return "sampler1DArrayShadow";
72 case TextureType::Color2D:
73 return "sampler2DShadow";
74 case TextureType::ColorArray2D:
75 return "sampler2DArrayShadow";
76 case TextureType::ColorCube:
77 return "samplerCubeShadow";
78 case TextureType::ColorArrayCube:
79 return "samplerCubeArrayShadow";
80 default:
81 throw NotImplementedException("Texture type: {}", type);
82 }
83 }
84 switch (type) {
85 case TextureType::Color1D:
86 return "sampler1D";
87 case TextureType::ColorArray1D:
88 return "sampler1DArray";
89 case TextureType::Color2D:
90 return "sampler2D";
91 case TextureType::ColorArray2D:
92 return "sampler2DArray";
93 case TextureType::Color3D:
94 return "sampler3D";
95 case TextureType::ColorCube:
96 return "samplerCube";
97 case TextureType::ColorArrayCube:
98 return "samplerCubeArray";
99 case TextureType::Buffer:
100 return "samplerBuffer";
101 default:
102 throw NotImplementedException("Texture type: {}", type);
103 }
104}
105
106std::string_view ImageType(TextureType type) {
107 switch (type) {
108 case TextureType::Color1D:
109 return "uimage1D";
110 case TextureType::ColorArray1D:
111 return "uimage1DArray";
112 case TextureType::Color2D:
113 return "uimage2D";
114 case TextureType::ColorArray2D:
115 return "uimage2DArray";
116 case TextureType::Color3D:
117 return "uimage3D";
118 case TextureType::ColorCube:
119 return "uimageCube";
120 case TextureType::ColorArrayCube:
121 return "uimageCubeArray";
122 case TextureType::Buffer:
123 return "uimageBuffer";
124 default:
125 throw NotImplementedException("Image type: {}", type);
126 }
127}
128
129std::string_view ImageFormatString(ImageFormat format) {
130 switch (format) {
131 case ImageFormat::Typeless:
132 return "";
133 case ImageFormat::R8_UINT:
134 return ",r8ui";
135 case ImageFormat::R8_SINT:
136 return ",r8i";
137 case ImageFormat::R16_UINT:
138 return ",r16ui";
139 case ImageFormat::R16_SINT:
140 return ",r16i";
141 case ImageFormat::R32_UINT:
142 return ",r32ui";
143 case ImageFormat::R32G32_UINT:
144 return ",rg32ui";
145 case ImageFormat::R32G32B32A32_UINT:
146 return ",rgba32ui";
147 default:
148 throw NotImplementedException("Image format: {}", format);
149 }
150}
151
152std::string_view ImageAccessQualifier(bool is_written, bool is_read) {
153 if (is_written && !is_read) {
154 return "writeonly ";
155 }
156 if (is_read && !is_written) {
157 return "readonly ";
158 }
159 return "";
160}
161
162std::string_view GetTessMode(TessPrimitive primitive) {
163 switch (primitive) {
164 case TessPrimitive::Triangles:
165 return "triangles";
166 case TessPrimitive::Quads:
167 return "quads";
168 case TessPrimitive::Isolines:
169 return "isolines";
170 }
171 throw InvalidArgument("Invalid tessellation primitive {}", primitive);
172}
173
174std::string_view GetTessSpacing(TessSpacing spacing) {
175 switch (spacing) {
176 case TessSpacing::Equal:
177 return "equal_spacing";
178 case TessSpacing::FractionalOdd:
179 return "fractional_odd_spacing";
180 case TessSpacing::FractionalEven:
181 return "fractional_even_spacing";
182 }
183 throw InvalidArgument("Invalid tessellation spacing {}", spacing);
184}
185
186std::string_view InputPrimitive(InputTopology topology) {
187 switch (topology) {
188 case InputTopology::Points:
189 return "points";
190 case InputTopology::Lines:
191 return "lines";
192 case InputTopology::LinesAdjacency:
193 return "lines_adjacency";
194 case InputTopology::Triangles:
195 return "triangles";
196 case InputTopology::TrianglesAdjacency:
197 return "triangles_adjacency";
198 }
199 throw InvalidArgument("Invalid input topology {}", topology);
200}
201
202std::string_view OutputPrimitive(OutputTopology topology) {
203 switch (topology) {
204 case OutputTopology::PointList:
205 return "points";
206 case OutputTopology::LineStrip:
207 return "line_strip";
208 case OutputTopology::TriangleStrip:
209 return "triangle_strip";
210 }
211 throw InvalidArgument("Invalid output topology {}", topology);
212}
213
214void SetupLegacyOutPerVertex(EmitContext& ctx, std::string& header) {
215 if (!ctx.info.stores.Legacy()) {
216 return;
217 }
218 if (ctx.info.stores.FixedFunctionTexture()) {
219 header += "vec4 gl_TexCoord[8];";
220 }
221 if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
222 header += "vec4 gl_FrontColor;";
223 }
224 if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontSpecularR)) {
225 header += "vec4 gl_FrontSecondaryColor;";
226 }
227 if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackDiffuseR)) {
228 header += "vec4 gl_BackColor;";
229 }
230 if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackSpecularR)) {
231 header += "vec4 gl_BackSecondaryColor;";
232 }
233}
234
235void SetupOutPerVertex(EmitContext& ctx, std::string& header) {
236 if (!StoresPerVertexAttributes(ctx.stage)) {
237 return;
238 }
239 if (ctx.uses_geometry_passthrough) {
240 return;
241 }
242 header += "out gl_PerVertex{vec4 gl_Position;";
243 if (ctx.info.stores[IR::Attribute::PointSize]) {
244 header += "float gl_PointSize;";
245 }
246 if (ctx.info.stores.ClipDistances()) {
247 header += "float gl_ClipDistance[];";
248 }
249 if (ctx.info.stores[IR::Attribute::ViewportIndex] &&
250 ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) {
251 header += "int gl_ViewportIndex;";
252 }
253 SetupLegacyOutPerVertex(ctx, header);
254 header += "};";
255 if (ctx.info.stores[IR::Attribute::ViewportIndex] && ctx.stage == Stage::Geometry) {
256 header += "out int gl_ViewportIndex;";
257 }
258}
259
260void SetupInPerVertex(EmitContext& ctx, std::string& header) {
261 // Currently only required for TessellationControl to adhere to
262 // ARB_separate_shader_objects requirements
263 if (ctx.stage != Stage::TessellationControl) {
264 return;
265 }
266 const bool loads_position{ctx.info.loads.AnyComponent(IR::Attribute::PositionX)};
267 const bool loads_point_size{ctx.info.loads[IR::Attribute::PointSize]};
268 const bool loads_clip_distance{ctx.info.loads.ClipDistances()};
269 const bool loads_per_vertex{loads_position || loads_point_size || loads_clip_distance};
270 if (!loads_per_vertex) {
271 return;
272 }
273 header += "in gl_PerVertex{";
274 if (loads_position) {
275 header += "vec4 gl_Position;";
276 }
277 if (loads_point_size) {
278 header += "float gl_PointSize;";
279 }
280 if (loads_clip_distance) {
281 header += "float gl_ClipDistance[];";
282 }
283 header += "}gl_in[gl_MaxPatchVertices];";
284}
285
286void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) {
287 if (!ctx.info.loads.Legacy()) {
288 return;
289 }
290 header += "in gl_PerFragment{";
291 if (ctx.info.loads.FixedFunctionTexture()) {
292 header += "vec4 gl_TexCoord[8];";
293 }
294 if (ctx.info.loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
295 header += "vec4 gl_Color;";
296 }
297 header += "};";
298}
299
300} // Anonymous namespace
301
302EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
303 const RuntimeInfo& runtime_info_)
304 : info{program.info}, profile{profile_}, runtime_info{runtime_info_}, stage{program.stage},
305 uses_geometry_passthrough{program.is_geometry_passthrough &&
306 profile.support_geometry_shader_passthrough} {
307 if (profile.need_fastmath_off) {
308 header += "#pragma optionNV(fastmath off)\n";
309 }
310 SetupExtensions();
311 switch (program.stage) {
312 case Stage::VertexA:
313 case Stage::VertexB:
314 stage_name = "vs";
315 break;
316 case Stage::TessellationControl:
317 stage_name = "tcs";
318 header += fmt::format("layout(vertices={})out;", program.invocations);
319 break;
320 case Stage::TessellationEval:
321 stage_name = "tes";
322 header += fmt::format("layout({},{},{})in;", GetTessMode(runtime_info.tess_primitive),
323 GetTessSpacing(runtime_info.tess_spacing),
324 runtime_info.tess_clockwise ? "cw" : "ccw");
325 break;
326 case Stage::Geometry:
327 stage_name = "gs";
328 header += fmt::format("layout({})in;", InputPrimitive(runtime_info.input_topology));
329 if (uses_geometry_passthrough) {
330 header += "layout(passthrough)in gl_PerVertex{vec4 gl_Position;};";
331 break;
332 } else if (program.is_geometry_passthrough &&
333 !profile.support_geometry_shader_passthrough) {
334 LOG_WARNING(Shader_GLSL, "Passthrough geometry program used but not supported");
335 }
336 header += fmt::format(
337 "layout({},max_vertices={})out;in gl_PerVertex{{vec4 gl_Position;}}gl_in[];",
338 OutputPrimitive(program.output_topology), program.output_vertices);
339 break;
340 case Stage::Fragment:
341 stage_name = "fs";
342 position_name = "gl_FragCoord";
343 if (runtime_info.force_early_z) {
344 header += "layout(early_fragment_tests)in;";
345 }
346 if (info.uses_sample_id) {
347 header += "in int gl_SampleID;";
348 }
349 if (info.stores_sample_mask) {
350 header += "out int gl_SampleMask[];";
351 }
352 break;
353 case Stage::Compute:
354 stage_name = "cs";
355 const u32 local_x{std::max(program.workgroup_size[0], 1u)};
356 const u32 local_y{std::max(program.workgroup_size[1], 1u)};
357 const u32 local_z{std::max(program.workgroup_size[2], 1u)};
358 header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;",
359 local_x, local_y, local_z);
360 break;
361 }
362 SetupOutPerVertex(*this, header);
363 SetupInPerVertex(*this, header);
364 SetupLegacyInPerFragment(*this, header);
365
366 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
367 if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) {
368 continue;
369 }
370 const auto qualifier{uses_geometry_passthrough ? "passthrough"
371 : fmt::format("location={}", index)};
372 header += fmt::format("layout({}){}in vec4 in_attr{}{};", qualifier,
373 InterpDecorator(info.interpolation[index]), index,
374 InputArrayDecorator(stage));
375 }
376 for (size_t index = 0; index < info.uses_patches.size(); ++index) {
377 if (!info.uses_patches[index]) {
378 continue;
379 }
380 const auto qualifier{stage == Stage::TessellationControl ? "out" : "in"};
381 header += fmt::format("layout(location={})patch {} vec4 patch{};", index, qualifier, index);
382 }
383 if (stage == Stage::Fragment) {
384 for (size_t index = 0; index < info.stores_frag_color.size(); ++index) {
385 if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) {
386 continue;
387 }
388 header += fmt::format("layout(location={})out vec4 frag_color{};", index, index);
389 }
390 }
391 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
392 if (info.stores.Generic(index)) {
393 DefineGenericOutput(index, program.invocations);
394 }
395 }
396 DefineConstantBuffers(bindings);
397 DefineStorageBuffers(bindings);
398 SetupImages(bindings);
399 SetupTextures(bindings);
400 DefineHelperFunctions();
401 DefineConstants();
402}
403
404void EmitContext::SetupExtensions() {
405 header += "#extension GL_ARB_separate_shader_objects : enable\n";
406 if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) {
407 header += "#extension GL_EXT_texture_shadow_lod : enable\n";
408 }
409 if (info.uses_int64 && profile.support_int64) {
410 header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
411 }
412 if (info.uses_int64_bit_atomics) {
413 header += "#extension GL_NV_shader_atomic_int64 : enable\n";
414 }
415 if (info.uses_atomic_f32_add) {
416 header += "#extension GL_NV_shader_atomic_float : enable\n";
417 }
418 if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
419 header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n";
420 }
421 if (info.uses_fp16) {
422 if (profile.support_gl_nv_gpu_shader_5) {
423 header += "#extension GL_NV_gpu_shader5 : enable\n";
424 }
425 if (profile.support_gl_amd_gpu_shader_half_float) {
426 header += "#extension GL_AMD_gpu_shader_half_float : enable\n";
427 }
428 }
429 if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote ||
430 info.uses_subgroup_shuffles || info.uses_fswzadd) {
431 header += "#extension GL_ARB_shader_ballot : enable\n"
432 "#extension GL_ARB_shader_group_vote : enable\n";
433 if (!info.uses_int64 && profile.support_int64) {
434 header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
435 }
436 if (profile.support_gl_warp_intrinsics) {
437 header += "#extension GL_NV_shader_thread_shuffle : enable\n";
438 }
439 }
440 if ((info.stores[IR::Attribute::ViewportIndex] || info.stores[IR::Attribute::Layer]) &&
441 profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) {
442 header += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
443 }
444 if (info.uses_sparse_residency && profile.support_gl_sparse_textures) {
445 header += "#extension GL_ARB_sparse_texture2 : enable\n";
446 }
447 if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) {
448 header += "#extension GL_NV_viewport_array2 : enable\n";
449 }
450 if (info.uses_typeless_image_reads) {
451 header += "#extension GL_EXT_shader_image_load_formatted : enable\n";
452 }
453 if (info.uses_derivatives && profile.support_gl_derivative_control) {
454 header += "#extension GL_ARB_derivative_control : enable\n";
455 }
456 if (uses_geometry_passthrough) {
457 header += "#extension GL_NV_geometry_shader_passthrough : enable\n";
458 }
459}
460
461void EmitContext::DefineConstantBuffers(Bindings& bindings) {
462 if (info.constant_buffer_descriptors.empty()) {
463 return;
464 }
465 for (const auto& desc : info.constant_buffer_descriptors) {
466 header += fmt::format(
467 "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};",
468 bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024);
469 bindings.uniform_buffer += desc.count;
470 }
471}
472
473void EmitContext::DefineStorageBuffers(Bindings& bindings) {
474 if (info.storage_buffers_descriptors.empty()) {
475 return;
476 }
477 u32 index{};
478 for (const auto& desc : info.storage_buffers_descriptors) {
479 header += fmt::format("layout(std430,binding={}) buffer {}_ssbo_{}{{uint {}_ssbo{}[];}};",
480 bindings.storage_buffer, stage_name, bindings.storage_buffer,
481 stage_name, index);
482 bindings.storage_buffer += desc.count;
483 index += desc.count;
484 }
485}
486
487void EmitContext::DefineGenericOutput(size_t index, u32 invocations) {
488 static constexpr std::string_view swizzle{"xyzw"};
489 const size_t base_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
490 u32 element{0};
491 while (element < 4) {
492 std::string definition{fmt::format("layout(location={}", index)};
493 const u32 remainder{4 - element};
494 const TransformFeedbackVarying* xfb_varying{};
495 if (!runtime_info.xfb_varyings.empty()) {
496 xfb_varying = &runtime_info.xfb_varyings[base_index + element];
497 xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr;
498 }
499 const u32 num_components{xfb_varying ? xfb_varying->components : remainder};
500 if (element > 0) {
501 definition += fmt::format(",component={}", element);
502 }
503 if (xfb_varying) {
504 definition +=
505 fmt::format(",xfb_buffer={},xfb_stride={},xfb_offset={}", xfb_varying->buffer,
506 xfb_varying->stride, xfb_varying->offset);
507 }
508 std::string name{fmt::format("out_attr{}", index)};
509 if (num_components < 4 || element > 0) {
510 name += fmt::format("_{}", swizzle.substr(element, num_components));
511 }
512 const auto type{num_components == 1 ? "float" : fmt::format("vec{}", num_components)};
513 definition += fmt::format(")out {} {}{};", type, name, OutputDecorator(stage, invocations));
514 header += definition;
515
516 const GenericElementInfo element_info{
517 .name = name,
518 .first_element = element,
519 .num_components = num_components,
520 };
521 std::fill_n(output_generics[index].begin() + element, num_components, element_info);
522 element += num_components;
523 }
524}
525
526void EmitContext::DefineHelperFunctions() {
527 header += "\n#define ftoi floatBitsToInt\n#define ftou floatBitsToUint\n"
528 "#define itof intBitsToFloat\n#define utof uintBitsToFloat\n";
529 if (info.uses_global_increment || info.uses_shared_increment) {
530 header += "uint CasIncrement(uint op_a,uint op_b){return op_a>=op_b?0u:(op_a+1u);}";
531 }
532 if (info.uses_global_decrement || info.uses_shared_decrement) {
533 header += "uint CasDecrement(uint op_a,uint op_b){"
534 "return op_a==0||op_a>op_b?op_b:(op_a-1u);}";
535 }
536 if (info.uses_atomic_f32_add) {
537 header += "uint CasFloatAdd(uint op_a,float op_b){"
538 "return ftou(utof(op_a)+op_b);}";
539 }
540 if (info.uses_atomic_f32x2_add) {
541 header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){"
542 "return packHalf2x16(unpackHalf2x16(op_a)+op_b);}";
543 }
544 if (info.uses_atomic_f32x2_min) {
545 header += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return "
546 "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}";
547 }
548 if (info.uses_atomic_f32x2_max) {
549 header += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return "
550 "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}";
551 }
552 if (info.uses_atomic_f16x2_add) {
553 header += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return "
554 "packFloat2x16(unpackFloat2x16(op_a)+op_b);}";
555 }
556 if (info.uses_atomic_f16x2_min) {
557 header += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return "
558 "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}";
559 }
560 if (info.uses_atomic_f16x2_max) {
561 header += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return "
562 "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}";
563 }
564 if (info.uses_atomic_s32_min) {
565 header += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}";
566 }
567 if (info.uses_atomic_s32_max) {
568 header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}";
569 }
570 if (info.uses_global_memory && profile.support_int64) {
571 header += DefineGlobalMemoryFunctions();
572 }
573 if (info.loads_indexed_attributes) {
574 const bool is_array{stage == Stage::Geometry};
575 const auto vertex_arg{is_array ? ",uint vertex" : ""};
576 std::string func{
577 fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint "
578 "masked_index=uint(base_index)&3u;switch(base_index>>2){{",
579 vertex_arg)};
580 if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
581 const auto position_idx{is_array ? "gl_in[vertex]." : ""};
582 func += fmt::format("case {}:return {}{}[masked_index];",
583 static_cast<u32>(IR::Attribute::PositionX) >> 2, position_idx,
584 position_name);
585 }
586 const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
587 for (u32 index = 0; index < IR::NUM_GENERICS; ++index) {
588 if (!info.loads.Generic(index)) {
589 continue;
590 }
591 const auto vertex_idx{is_array ? "[vertex]" : ""};
592 func += fmt::format("case {}:return in_attr{}{}[masked_index];",
593 base_attribute_value + index, index, vertex_idx);
594 }
595 func += "default: return 0.0;}}";
596 header += func;
597 }
598 if (info.stores_indexed_attributes) {
599 // TODO
600 }
601}
602
603std::string EmitContext::DefineGlobalMemoryFunctions() {
604 const auto define_body{[&](std::string& func, size_t index, std::string_view return_statement) {
605 const auto& ssbo{info.storage_buffers_descriptors[index]};
606 const u32 size_cbuf_offset{ssbo.cbuf_offset + 8};
607 const auto ssbo_addr{fmt::format("ssbo_addr{}", index)};
608 const auto cbuf{fmt::format("{}_cbuf{}", stage_name, ssbo.cbuf_index)};
609 std::array<std::string, 2> addr_xy;
610 std::array<std::string, 2> size_xy;
611 for (size_t i = 0; i < addr_xy.size(); ++i) {
612 const auto addr_loc{ssbo.cbuf_offset + 4 * i};
613 const auto size_loc{size_cbuf_offset + 4 * i};
614 addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc));
615 size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc));
616 }
617 const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])};
618 const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)};
619 func += addr_statment;
620
621 const auto size_vec{fmt::format("uvec2({},{})", size_xy[0], size_xy[1])};
622 const auto comp_lhs{fmt::format("(addr>={})", ssbo_addr)};
623 const auto comp_rhs{fmt::format("(addr<({}+uint64_t({})))", ssbo_addr, size_vec)};
624 const auto comparison{fmt::format("if({}&&{}){{", comp_lhs, comp_rhs)};
625 func += comparison;
626
627 const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)};
628 func += fmt::format(fmt::runtime(return_statement), ssbo_name, ssbo_addr);
629 }};
630 std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){"};
631 std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){"};
632 std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){"};
633 std::string load_func{"uint LoadGlobal32(uint64_t addr){"};
634 std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){"};
635 std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){"};
636 const size_t num_buffers{info.storage_buffers_descriptors.size()};
637 for (size_t index = 0; index < num_buffers; ++index) {
638 if (!info.nvn_buffer_used[index]) {
639 continue;
640 }
641 define_body(write_func, index, "{0}[uint(addr-{1})>>2]=data;return;}}");
642 define_body(write_func_64, index,
643 "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;return;}}");
644 define_body(write_func_128, index,
645 "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;{0}[uint("
646 "addr-{1}+8)>>2]=data.z;{0}[uint(addr-{1}+12)>>2]=data.w;return;}}");
647 define_body(load_func, index, "return {0}[uint(addr-{1})>>2];}}");
648 define_body(load_func_64, index,
649 "return uvec2({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2]);}}");
650 define_body(load_func_128, index,
651 "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}["
652 "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}");
653 }
654 write_func += '}';
655 write_func_64 += '}';
656 write_func_128 += '}';
657 load_func += "return 0u;}";
658 load_func_64 += "return uvec2(0);}";
659 load_func_128 += "return uvec4(0);}";
660 return write_func + write_func_64 + write_func_128 + load_func + load_func_64 + load_func_128;
661}
662
663void EmitContext::SetupImages(Bindings& bindings) {
664 image_buffers.reserve(info.image_buffer_descriptors.size());
665 for (const auto& desc : info.image_buffer_descriptors) {
666 image_buffers.push_back({bindings.image, desc.count});
667 const auto format{ImageFormatString(desc.format)};
668 const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
669 const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
670 header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};",
671 bindings.image, format, qualifier, bindings.image, array_decorator);
672 bindings.image += desc.count;
673 }
674 images.reserve(info.image_descriptors.size());
675 for (const auto& desc : info.image_descriptors) {
676 images.push_back({bindings.image, desc.count});
677 const auto format{ImageFormatString(desc.format)};
678 const auto image_type{ImageType(desc.type)};
679 const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
680 const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
681 header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format,
682 qualifier, image_type, bindings.image, array_decorator);
683 bindings.image += desc.count;
684 }
685}
686
687void EmitContext::SetupTextures(Bindings& bindings) {
688 texture_buffers.reserve(info.texture_buffer_descriptors.size());
689 for (const auto& desc : info.texture_buffer_descriptors) {
690 texture_buffers.push_back({bindings.texture, desc.count});
691 const auto sampler_type{SamplerType(TextureType::Buffer, false)};
692 const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
693 header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture,
694 sampler_type, bindings.texture, array_decorator);
695 bindings.texture += desc.count;
696 }
697 textures.reserve(info.texture_descriptors.size());
698 for (const auto& desc : info.texture_descriptors) {
699 textures.push_back({bindings.texture, desc.count});
700 const auto sampler_type{SamplerType(desc.type, desc.is_depth)};
701 const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
702 header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture,
703 sampler_type, bindings.texture, array_decorator);
704 bindings.texture += desc.count;
705 }
706}
707
708void EmitContext::DefineConstants() {
709 if (info.uses_fswzadd) {
710 header += "const float FSWZ_A[]=float[4](-1.f,1.f,-1.f,0.f);"
711 "const float FSWZ_B[]=float[4](-1.f,-1.f,1.f,-1.f);";
712 }
713}
714
715} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h
new file mode 100644
index 000000000..d9b639d29
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_context.h
@@ -0,0 +1,174 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <utility>
9#include <vector>
10
11#include <fmt/format.h>
12
13#include "shader_recompiler/backend/glsl/var_alloc.h"
14#include "shader_recompiler/stage.h"
15
16namespace Shader {
17struct Info;
18struct Profile;
19struct RuntimeInfo;
20} // namespace Shader
21
22namespace Shader::Backend {
23struct Bindings;
24}
25
26namespace Shader::IR {
27class Inst;
28struct Program;
29} // namespace Shader::IR
30
31namespace Shader::Backend::GLSL {
32
33struct GenericElementInfo {
34 std::string name;
35 u32 first_element{};
36 u32 num_components{};
37};
38
39struct TextureImageDefinition {
40 u32 binding;
41 u32 count;
42};
43
44class EmitContext {
45public:
46 explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
47 const RuntimeInfo& runtime_info_);
48
49 template <GlslVarType type, typename... Args>
50 void Add(const char* format_str, IR::Inst& inst, Args&&... args) {
51 const auto var_def{var_alloc.AddDefine(inst, type)};
52 if (var_def.empty()) {
53 // skip assigment.
54 code += fmt::format(fmt::runtime(format_str + 3), std::forward<Args>(args)...);
55 } else {
56 code += fmt::format(fmt::runtime(format_str), var_def, std::forward<Args>(args)...);
57 }
58 // TODO: Remove this
59 code += '\n';
60 }
61
62 template <typename... Args>
63 void AddU1(const char* format_str, IR::Inst& inst, Args&&... args) {
64 Add<GlslVarType::U1>(format_str, inst, args...);
65 }
66
67 template <typename... Args>
68 void AddF16x2(const char* format_str, IR::Inst& inst, Args&&... args) {
69 Add<GlslVarType::F16x2>(format_str, inst, args...);
70 }
71
72 template <typename... Args>
73 void AddU32(const char* format_str, IR::Inst& inst, Args&&... args) {
74 Add<GlslVarType::U32>(format_str, inst, args...);
75 }
76
77 template <typename... Args>
78 void AddF32(const char* format_str, IR::Inst& inst, Args&&... args) {
79 Add<GlslVarType::F32>(format_str, inst, args...);
80 }
81
82 template <typename... Args>
83 void AddU64(const char* format_str, IR::Inst& inst, Args&&... args) {
84 Add<GlslVarType::U64>(format_str, inst, args...);
85 }
86
87 template <typename... Args>
88 void AddF64(const char* format_str, IR::Inst& inst, Args&&... args) {
89 Add<GlslVarType::F64>(format_str, inst, args...);
90 }
91
92 template <typename... Args>
93 void AddU32x2(const char* format_str, IR::Inst& inst, Args&&... args) {
94 Add<GlslVarType::U32x2>(format_str, inst, args...);
95 }
96
97 template <typename... Args>
98 void AddF32x2(const char* format_str, IR::Inst& inst, Args&&... args) {
99 Add<GlslVarType::F32x2>(format_str, inst, args...);
100 }
101
102 template <typename... Args>
103 void AddU32x3(const char* format_str, IR::Inst& inst, Args&&... args) {
104 Add<GlslVarType::U32x3>(format_str, inst, args...);
105 }
106
107 template <typename... Args>
108 void AddF32x3(const char* format_str, IR::Inst& inst, Args&&... args) {
109 Add<GlslVarType::F32x3>(format_str, inst, args...);
110 }
111
112 template <typename... Args>
113 void AddU32x4(const char* format_str, IR::Inst& inst, Args&&... args) {
114 Add<GlslVarType::U32x4>(format_str, inst, args...);
115 }
116
117 template <typename... Args>
118 void AddF32x4(const char* format_str, IR::Inst& inst, Args&&... args) {
119 Add<GlslVarType::F32x4>(format_str, inst, args...);
120 }
121
122 template <typename... Args>
123 void AddPrecF32(const char* format_str, IR::Inst& inst, Args&&... args) {
124 Add<GlslVarType::PrecF32>(format_str, inst, args...);
125 }
126
127 template <typename... Args>
128 void AddPrecF64(const char* format_str, IR::Inst& inst, Args&&... args) {
129 Add<GlslVarType::PrecF64>(format_str, inst, args...);
130 }
131
132 template <typename... Args>
133 void Add(const char* format_str, Args&&... args) {
134 code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...);
135 // TODO: Remove this
136 code += '\n';
137 }
138
139 std::string header;
140 std::string code;
141 VarAlloc var_alloc;
142 const Info& info;
143 const Profile& profile;
144 const RuntimeInfo& runtime_info;
145
146 Stage stage{};
147 std::string_view stage_name = "invalid";
148 std::string_view position_name = "gl_Position";
149
150 std::vector<TextureImageDefinition> texture_buffers;
151 std::vector<TextureImageDefinition> image_buffers;
152 std::vector<TextureImageDefinition> textures;
153 std::vector<TextureImageDefinition> images;
154 std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
155
156 u32 num_safety_loop_vars{};
157
158 bool uses_y_direction{};
159 bool uses_cc_carry{};
160 bool uses_geometry_passthrough{};
161
162private:
163 void SetupExtensions();
164 void DefineConstantBuffers(Bindings& bindings);
165 void DefineStorageBuffers(Bindings& bindings);
166 void DefineGenericOutput(size_t index, u32 invocations);
167 void DefineHelperFunctions();
168 void DefineConstants();
169 std::string DefineGlobalMemoryFunctions();
170 void SetupImages(Bindings& bindings);
171 void SetupTextures(Bindings& bindings);
172};
173
174} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp
new file mode 100644
index 000000000..8a430d573
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp
@@ -0,0 +1,252 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <string>
7#include <tuple>
8#include <type_traits>
9
10#include "common/div_ceil.h"
11#include "common/settings.h"
12#include "shader_recompiler/backend/glsl/emit_context.h"
13#include "shader_recompiler/backend/glsl/emit_glsl.h"
14#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
15#include "shader_recompiler/frontend/ir/ir_emitter.h"
16
17namespace Shader::Backend::GLSL {
18namespace {
19template <class Func>
20struct FuncTraits {};
21
22template <class ReturnType_, class... Args>
23struct FuncTraits<ReturnType_ (*)(Args...)> {
24 using ReturnType = ReturnType_;
25
26 static constexpr size_t NUM_ARGS = sizeof...(Args);
27
28 template <size_t I>
29 using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
30};
31
32template <auto func, typename... Args>
33void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) {
34 inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...));
35}
36
37template <typename ArgType>
38auto Arg(EmitContext& ctx, const IR::Value& arg) {
39 if constexpr (std::is_same_v<ArgType, std::string_view>) {
40 return ctx.var_alloc.Consume(arg);
41 } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
42 return arg;
43 } else if constexpr (std::is_same_v<ArgType, u32>) {
44 return arg.U32();
45 } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
46 return arg.Attribute();
47 } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
48 return arg.Patch();
49 } else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
50 return arg.Reg();
51 }
52}
53
54template <auto func, bool is_first_arg_inst, size_t... I>
55void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
56 using Traits = FuncTraits<decltype(func)>;
57 if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) {
58 if constexpr (is_first_arg_inst) {
59 SetDefinition<func>(
60 ctx, inst, *inst,
61 Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
62 } else {
63 SetDefinition<func>(
64 ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
65 }
66 } else {
67 if constexpr (is_first_arg_inst) {
68 func(ctx, *inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
69 } else {
70 func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
71 }
72 }
73}
74
75template <auto func>
76void Invoke(EmitContext& ctx, IR::Inst* inst) {
77 using Traits = FuncTraits<decltype(func)>;
78 static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
79 if constexpr (Traits::NUM_ARGS == 1) {
80 Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
81 } else {
82 using FirstArgType = typename Traits::template ArgType<1>;
83 static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>;
84 using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
85 Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
86 }
87}
88
89void EmitInst(EmitContext& ctx, IR::Inst* inst) {
90 switch (inst->GetOpcode()) {
91#define OPCODE(name, result_type, ...) \
92 case IR::Opcode::name: \
93 return Invoke<&Emit##name>(ctx, inst);
94#include "shader_recompiler/frontend/ir/opcodes.inc"
95#undef OPCODE
96 }
97 throw LogicError("Invalid opcode {}", inst->GetOpcode());
98}
99
100bool IsReference(IR::Inst& inst) {
101 return inst.GetOpcode() == IR::Opcode::Reference;
102}
103
104void PrecolorInst(IR::Inst& phi) {
105 // Insert phi moves before references to avoid overwritting other phis
106 const size_t num_args{phi.NumArgs()};
107 for (size_t i = 0; i < num_args; ++i) {
108 IR::Block& phi_block{*phi.PhiBlock(i)};
109 auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()};
110 IR::IREmitter ir{phi_block, it};
111 const IR::Value arg{phi.Arg(i)};
112 if (arg.IsImmediate()) {
113 ir.PhiMove(phi, arg);
114 } else {
115 ir.PhiMove(phi, IR::Value{arg.InstRecursive()});
116 }
117 }
118 for (size_t i = 0; i < num_args; ++i) {
119 IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
120 }
121}
122
123void Precolor(const IR::Program& program) {
124 for (IR::Block* const block : program.blocks) {
125 for (IR::Inst& phi : block->Instructions()) {
126 if (!IR::IsPhi(phi)) {
127 break;
128 }
129 PrecolorInst(phi);
130 }
131 }
132}
133
134void EmitCode(EmitContext& ctx, const IR::Program& program) {
135 for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
136 switch (node.type) {
137 case IR::AbstractSyntaxNode::Type::Block:
138 for (IR::Inst& inst : node.data.block->Instructions()) {
139 EmitInst(ctx, &inst);
140 }
141 break;
142 case IR::AbstractSyntaxNode::Type::If:
143 ctx.Add("if({}){{", ctx.var_alloc.Consume(node.data.if_node.cond));
144 break;
145 case IR::AbstractSyntaxNode::Type::EndIf:
146 ctx.Add("}}");
147 break;
148 case IR::AbstractSyntaxNode::Type::Break:
149 if (node.data.break_node.cond.IsImmediate()) {
150 if (node.data.break_node.cond.U1()) {
151 ctx.Add("break;");
152 }
153 } else {
154 ctx.Add("if({}){{break;}}", ctx.var_alloc.Consume(node.data.break_node.cond));
155 }
156 break;
157 case IR::AbstractSyntaxNode::Type::Return:
158 case IR::AbstractSyntaxNode::Type::Unreachable:
159 ctx.Add("return;");
160 break;
161 case IR::AbstractSyntaxNode::Type::Loop:
162 ctx.Add("for(;;){{");
163 break;
164 case IR::AbstractSyntaxNode::Type::Repeat:
165 if (Settings::values.disable_shader_loop_safety_checks) {
166 ctx.Add("if(!{}){{break;}}}}", ctx.var_alloc.Consume(node.data.repeat.cond));
167 } else {
168 ctx.Add("if(--loop{}<0 || !{}){{break;}}}}", ctx.num_safety_loop_vars++,
169 ctx.var_alloc.Consume(node.data.repeat.cond));
170 }
171 break;
172 default:
173 throw NotImplementedException("AbstractSyntaxNode Type {}", node.type);
174 }
175 }
176}
177
178std::string GlslVersionSpecifier(const EmitContext& ctx) {
179 if (ctx.uses_y_direction || ctx.info.stores.Legacy() || ctx.info.loads.Legacy()) {
180 return " compatibility";
181 }
182 return "";
183}
184
185bool IsPreciseType(GlslVarType type) {
186 switch (type) {
187 case GlslVarType::PrecF32:
188 case GlslVarType::PrecF64:
189 return true;
190 default:
191 return false;
192 }
193}
194
195void DefineVariables(const EmitContext& ctx, std::string& header) {
196 for (u32 i = 0; i < static_cast<u32>(GlslVarType::Void); ++i) {
197 const auto type{static_cast<GlslVarType>(i)};
198 const auto& tracker{ctx.var_alloc.GetUseTracker(type)};
199 const auto type_name{ctx.var_alloc.GetGlslType(type)};
200 const bool has_precise_bug{ctx.stage == Stage::Fragment && ctx.profile.has_gl_precise_bug};
201 const auto precise{!has_precise_bug && IsPreciseType(type) ? "precise " : ""};
202 // Temps/return types that are never used are stored at index 0
203 if (tracker.uses_temp) {
204 header += fmt::format("{}{} t{}={}(0);", precise, type_name,
205 ctx.var_alloc.Representation(0, type), type_name);
206 }
207 for (u32 index = 0; index < tracker.num_used; ++index) {
208 header += fmt::format("{}{} {}={}(0);", precise, type_name,
209 ctx.var_alloc.Representation(index, type), type_name);
210 }
211 }
212 for (u32 i = 0; i < ctx.num_safety_loop_vars; ++i) {
213 header += fmt::format("int loop{}=0x2000;", i);
214 }
215}
216} // Anonymous namespace
217
218std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program,
219 Bindings& bindings) {
220 EmitContext ctx{program, bindings, profile, runtime_info};
221 Precolor(program);
222 EmitCode(ctx, program);
223 const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))};
224 ctx.header.insert(0, version);
225 if (program.shared_memory_size > 0) {
226 const auto requested_size{program.shared_memory_size};
227 const auto max_size{profile.gl_max_compute_smem_size};
228 const bool needs_clamp{requested_size > max_size};
229 if (needs_clamp) {
230 LOG_WARNING(Shader_GLSL, "Requested shared memory size ({}) exceeds device limit ({})",
231 requested_size, max_size);
232 }
233 const auto smem_size{needs_clamp ? max_size : requested_size};
234 ctx.header += fmt::format("shared uint smem[{}];", Common::DivCeil(smem_size, 4U));
235 }
236 ctx.header += "void main(){\n";
237 if (program.local_memory_size > 0) {
238 ctx.header += fmt::format("uint lmem[{}];", Common::DivCeil(program.local_memory_size, 4U));
239 }
240 DefineVariables(ctx, ctx.header);
241 if (ctx.uses_cc_carry) {
242 ctx.header += "uint carry;";
243 }
244 if (program.info.uses_subgroup_shuffles) {
245 ctx.header += "bool shfl_in_bounds;";
246 }
247 ctx.code.insert(0, ctx.header);
248 ctx.code += '}';
249 return ctx.code;
250}
251
252} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.h b/src/shader_recompiler/backend/glsl/emit_glsl.h
new file mode 100644
index 000000000..20e5719e6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl.h
@@ -0,0 +1,24 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8
9#include "shader_recompiler/backend/bindings.h"
10#include "shader_recompiler/frontend/ir/program.h"
11#include "shader_recompiler/profile.h"
12#include "shader_recompiler/runtime_info.h"
13
14namespace Shader::Backend::GLSL {
15
16[[nodiscard]] std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info,
17 IR::Program& program, Bindings& bindings);
18
19[[nodiscard]] inline std::string EmitGLSL(const Profile& profile, IR::Program& program) {
20 Bindings binding;
21 return EmitGLSL(profile, {}, program, binding);
22}
23
24} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
new file mode 100644
index 000000000..772acc5a4
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
@@ -0,0 +1,418 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::Backend::GLSL {
12namespace {
13constexpr char cas_loop[]{
14 "for (;;){{uint old={};{}=atomicCompSwap({},old,{}({},{}));if({}==old){{break;}}}}"};
15
16void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
17 std::string_view value, std::string_view function) {
18 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
19 const std::string smem{fmt::format("smem[{}>>2]", offset)};
20 ctx.Add(cas_loop, smem, ret, smem, function, smem, value, ret);
21}
22
23void SsboCasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
24 const IR::Value& offset, std::string_view value, std::string_view function) {
25 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
26 const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(),
27 ctx.var_alloc.Consume(offset))};
28 ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret);
29}
30
31void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
32 const IR::Value& offset, std::string_view value,
33 std::string_view function) {
34 const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(),
35 ctx.var_alloc.Consume(offset))};
36 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
37 ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret);
38 ctx.AddF32("{}=utof({});", inst, ret);
39}
40} // Anonymous namespace
41
42void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
43 std::string_view value) {
44 ctx.AddU32("{}=atomicAdd(smem[{}>>2],{});", inst, pointer_offset, value);
45}
46
47void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
48 std::string_view value) {
49 const std::string u32_value{fmt::format("uint({})", value)};
50 SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMinS32");
51}
52
53void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
54 std::string_view value) {
55 ctx.AddU32("{}=atomicMin(smem[{}>>2],{});", inst, pointer_offset, value);
56}
57
58void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
59 std::string_view value) {
60 const std::string u32_value{fmt::format("uint({})", value)};
61 SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMaxS32");
62}
63
64void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
65 std::string_view value) {
66 ctx.AddU32("{}=atomicMax(smem[{}>>2],{});", inst, pointer_offset, value);
67}
68
69void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
70 std::string_view value) {
71 SharedCasFunction(ctx, inst, pointer_offset, value, "CasIncrement");
72}
73
74void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
75 std::string_view value) {
76 SharedCasFunction(ctx, inst, pointer_offset, value, "CasDecrement");
77}
78
79void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
80 std::string_view value) {
81 ctx.AddU32("{}=atomicAnd(smem[{}>>2],{});", inst, pointer_offset, value);
82}
83
84void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
85 std::string_view value) {
86 ctx.AddU32("{}=atomicOr(smem[{}>>2],{});", inst, pointer_offset, value);
87}
88
89void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
90 std::string_view value) {
91 ctx.AddU32("{}=atomicXor(smem[{}>>2],{});", inst, pointer_offset, value);
92}
93
94void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
95 std::string_view value) {
96 ctx.AddU32("{}=atomicExchange(smem[{}>>2],{});", inst, pointer_offset, value);
97}
98
99void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
100 std::string_view value) {
101 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
102 ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset,
103 pointer_offset);
104 ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;",
105 pointer_offset, value, pointer_offset, value);
106}
107
108void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
109 const IR::Value& offset, std::string_view value) {
110 ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
111 ctx.var_alloc.Consume(offset), value);
112}
113
114void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
115 const IR::Value& offset, std::string_view value) {
116 const std::string u32_value{fmt::format("uint({})", value)};
117 SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMinS32");
118}
119
120void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
121 const IR::Value& offset, std::string_view value) {
122 ctx.AddU32("{}=atomicMin({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
123 ctx.var_alloc.Consume(offset), value);
124}
125
126void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
127 const IR::Value& offset, std::string_view value) {
128 const std::string u32_value{fmt::format("uint({})", value)};
129 SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMaxS32");
130}
131
132void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
133 const IR::Value& offset, std::string_view value) {
134 ctx.AddU32("{}=atomicMax({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
135 ctx.var_alloc.Consume(offset), value);
136}
137
138void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
139 const IR::Value& offset, std::string_view value) {
140 SsboCasFunction(ctx, inst, binding, offset, value, "CasIncrement");
141}
142
143void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
144 const IR::Value& offset, std::string_view value) {
145 SsboCasFunction(ctx, inst, binding, offset, value, "CasDecrement");
146}
147
148void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
149 const IR::Value& offset, std::string_view value) {
150 ctx.AddU32("{}=atomicAnd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
151 ctx.var_alloc.Consume(offset), value);
152}
153
154void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
155 const IR::Value& offset, std::string_view value) {
156 ctx.AddU32("{}=atomicOr({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
157 ctx.var_alloc.Consume(offset), value);
158}
159
160void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
161 const IR::Value& offset, std::string_view value) {
162 ctx.AddU32("{}=atomicXor({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
163 ctx.var_alloc.Consume(offset), value);
164}
165
166void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
167 const IR::Value& offset, std::string_view value) {
168 ctx.AddU32("{}=atomicExchange({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
169 ctx.var_alloc.Consume(offset), value);
170}
171
172void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
173 const IR::Value& offset, std::string_view value) {
174 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
175 ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
176 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
177 binding.U32(), ctx.var_alloc.Consume(offset));
178 ctx.Add("{}_ssbo{}[{}>>2]+=unpackUint2x32({}).x;{}_ssbo{}[({}>>2)+1]+=unpackUint2x32({}).y;",
179 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
180 binding.U32(), ctx.var_alloc.Consume(offset), value);
181}
182
183void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
184 const IR::Value& offset, std::string_view value) {
185 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
186 ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
187 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
188 binding.U32(), ctx.var_alloc.Consume(offset));
189 ctx.Add("for(int i=0;i<2;++i){{ "
190 "{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])"
191 ");}}",
192 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
193 binding.U32(), ctx.var_alloc.Consume(offset), value);
194}
195
196void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
197 const IR::Value& offset, std::string_view value) {
198 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
199 ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
200 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
201 binding.U32(), ctx.var_alloc.Consume(offset));
202 ctx.Add("for(int i=0;i<2;++i){{ "
203 "{}_ssbo{}[({}>>2)+i]=min({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}))[i]);}}",
204 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
205 binding.U32(), ctx.var_alloc.Consume(offset), value);
206}
207
208void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
209 const IR::Value& offset, std::string_view value) {
210 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
211 ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
212 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
213 binding.U32(), ctx.var_alloc.Consume(offset));
214 ctx.Add("for(int i=0;i<2;++i){{ "
215 "{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])"
216 ");}}",
217 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
218 binding.U32(), ctx.var_alloc.Consume(offset), value);
219}
220
221void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
222 const IR::Value& offset, std::string_view value) {
223 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
224 ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
225 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
226 binding.U32(), ctx.var_alloc.Consume(offset));
227 ctx.Add("for(int "
228 "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}"
229 "))[i]);}}",
230 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
231 binding.U32(), ctx.var_alloc.Consume(offset), value);
232}
233
234void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
235 const IR::Value& offset, std::string_view value) {
236 ctx.AddU64(
237 "{}=packUint2x32(uvec2(atomicAnd({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicAnd({}_"
238 "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
239 inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
240 binding.U32(), ctx.var_alloc.Consume(offset), value);
241}
242
243void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
244 const IR::Value& offset, std::string_view value) {
245 ctx.AddU64("{}=packUint2x32(uvec2(atomicOr({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicOr({}_"
246 "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
247 inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
248 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
249}
250
251void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
252 const IR::Value& offset, std::string_view value) {
253 ctx.AddU64(
254 "{}=packUint2x32(uvec2(atomicXor({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicXor({}_"
255 "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
256 inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
257 binding.U32(), ctx.var_alloc.Consume(offset), value);
258}
259
260void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
261 const IR::Value& offset, std::string_view value) {
262 ctx.AddU64("{}=packUint2x32(uvec2(atomicExchange({}_ssbo{}[{}>>2],unpackUint2x32({}).x),"
263 "atomicExchange({}_ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
264 inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
265 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
266}
267
268void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
269 const IR::Value& offset, std::string_view value) {
270 SsboCasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd");
271}
272
273void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
274 const IR::Value& offset, std::string_view value) {
275 SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd16x2");
276}
277
278void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
279 const IR::Value& offset, std::string_view value) {
280 SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd32x2");
281}
282
283void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
284 const IR::Value& offset, std::string_view value) {
285 SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin16x2");
286}
287
288void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
289 const IR::Value& offset, std::string_view value) {
290 SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin32x2");
291}
292
293void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
294 const IR::Value& offset, std::string_view value) {
295 SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax16x2");
296}
297
298void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
299 const IR::Value& offset, std::string_view value) {
300 SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax32x2");
301}
302
303void EmitGlobalAtomicIAdd32(EmitContext&) {
304 throw NotImplementedException("GLSL Instrucion");
305}
306
307void EmitGlobalAtomicSMin32(EmitContext&) {
308 throw NotImplementedException("GLSL Instrucion");
309}
310
311void EmitGlobalAtomicUMin32(EmitContext&) {
312 throw NotImplementedException("GLSL Instrucion");
313}
314
315void EmitGlobalAtomicSMax32(EmitContext&) {
316 throw NotImplementedException("GLSL Instrucion");
317}
318
319void EmitGlobalAtomicUMax32(EmitContext&) {
320 throw NotImplementedException("GLSL Instrucion");
321}
322
323void EmitGlobalAtomicInc32(EmitContext&) {
324 throw NotImplementedException("GLSL Instrucion");
325}
326
327void EmitGlobalAtomicDec32(EmitContext&) {
328 throw NotImplementedException("GLSL Instrucion");
329}
330
331void EmitGlobalAtomicAnd32(EmitContext&) {
332 throw NotImplementedException("GLSL Instrucion");
333}
334
335void EmitGlobalAtomicOr32(EmitContext&) {
336 throw NotImplementedException("GLSL Instrucion");
337}
338
339void EmitGlobalAtomicXor32(EmitContext&) {
340 throw NotImplementedException("GLSL Instrucion");
341}
342
343void EmitGlobalAtomicExchange32(EmitContext&) {
344 throw NotImplementedException("GLSL Instrucion");
345}
346
347void EmitGlobalAtomicIAdd64(EmitContext&) {
348 throw NotImplementedException("GLSL Instrucion");
349}
350
351void EmitGlobalAtomicSMin64(EmitContext&) {
352 throw NotImplementedException("GLSL Instrucion");
353}
354
355void EmitGlobalAtomicUMin64(EmitContext&) {
356 throw NotImplementedException("GLSL Instrucion");
357}
358
359void EmitGlobalAtomicSMax64(EmitContext&) {
360 throw NotImplementedException("GLSL Instrucion");
361}
362
363void EmitGlobalAtomicUMax64(EmitContext&) {
364 throw NotImplementedException("GLSL Instrucion");
365}
366
367void EmitGlobalAtomicInc64(EmitContext&) {
368 throw NotImplementedException("GLSL Instrucion");
369}
370
371void EmitGlobalAtomicDec64(EmitContext&) {
372 throw NotImplementedException("GLSL Instrucion");
373}
374
375void EmitGlobalAtomicAnd64(EmitContext&) {
376 throw NotImplementedException("GLSL Instrucion");
377}
378
379void EmitGlobalAtomicOr64(EmitContext&) {
380 throw NotImplementedException("GLSL Instrucion");
381}
382
383void EmitGlobalAtomicXor64(EmitContext&) {
384 throw NotImplementedException("GLSL Instrucion");
385}
386
387void EmitGlobalAtomicExchange64(EmitContext&) {
388 throw NotImplementedException("GLSL Instrucion");
389}
390
391void EmitGlobalAtomicAddF32(EmitContext&) {
392 throw NotImplementedException("GLSL Instrucion");
393}
394
395void EmitGlobalAtomicAddF16x2(EmitContext&) {
396 throw NotImplementedException("GLSL Instrucion");
397}
398
399void EmitGlobalAtomicAddF32x2(EmitContext&) {
400 throw NotImplementedException("GLSL Instrucion");
401}
402
403void EmitGlobalAtomicMinF16x2(EmitContext&) {
404 throw NotImplementedException("GLSL Instrucion");
405}
406
407void EmitGlobalAtomicMinF32x2(EmitContext&) {
408 throw NotImplementedException("GLSL Instrucion");
409}
410
411void EmitGlobalAtomicMaxF16x2(EmitContext&) {
412 throw NotImplementedException("GLSL Instrucion");
413}
414
415void EmitGlobalAtomicMaxF32x2(EmitContext&) {
416 throw NotImplementedException("GLSL Instrucion");
417}
418} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp
new file mode 100644
index 000000000..e1d1b558e
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp
@@ -0,0 +1,21 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/glsl/emit_context.h"
6#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
7#include "shader_recompiler/frontend/ir/value.h"
8
9namespace Shader::Backend::GLSL {
10void EmitBarrier(EmitContext& ctx) {
11 ctx.Add("barrier();");
12}
13
14void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
15 ctx.Add("groupMemoryBarrier();");
16}
17
18void EmitDeviceMemoryBarrier(EmitContext& ctx) {
19 ctx.Add("memoryBarrier();");
20}
21} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
new file mode 100644
index 000000000..3c1714e89
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
@@ -0,0 +1,94 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::Backend::GLSL {
12namespace {
13void Alias(IR::Inst& inst, const IR::Value& value) {
14 if (value.IsImmediate()) {
15 return;
16 }
17 IR::Inst& value_inst{*value.InstRecursive()};
18 value_inst.DestructiveAddUsage(inst.UseCount());
19 value_inst.DestructiveRemoveUsage();
20 inst.SetDefinition(value_inst.Definition<Id>());
21}
22} // Anonymous namespace
23
24void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) {
25 Alias(inst, value);
26}
27
28void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) {
29 // Fake one usage to get a real variable out of the condition
30 inst.DestructiveAddUsage(1);
31 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U1)};
32 const auto input{ctx.var_alloc.Consume(value)};
33 if (ret != input) {
34 ctx.Add("{}={};", ret, input);
35 }
36}
37
38void EmitBitCastU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) {
39 NotImplemented();
40}
41
42void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
43 ctx.AddU32("{}=ftou({});", inst, value);
44}
45
46void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
47 ctx.AddU64("{}=doubleBitsToUint64({});", inst, value);
48}
49
50void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) {
51 NotImplemented();
52}
53
54void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
55 ctx.AddF32("{}=utof({});", inst, value);
56}
57
58void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
59 ctx.AddF64("{}=uint64BitsToDouble({});", inst, value);
60}
61
62void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
63 ctx.AddU64("{}=packUint2x32({});", inst, value);
64}
65
66void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
67 ctx.AddU32x2("{}=unpackUint2x32({});", inst, value);
68}
69
70void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
71 ctx.AddU32("{}=packFloat2x16({});", inst, value);
72}
73
74void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
75 ctx.AddF16x2("{}=unpackFloat2x16({});", inst, value);
76}
77
78void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
79 ctx.AddU32("{}=packHalf2x16({});", inst, value);
80}
81
82void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
83 ctx.AddF32x2("{}=unpackHalf2x16({});", inst, value);
84}
85
86void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
87 ctx.AddF64("{}=packDouble2x32({});", inst, value);
88}
89
90void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
91 ctx.AddU32x2("{}=unpackDouble2x32({});", inst, value);
92}
93
94} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp
new file mode 100644
index 000000000..49a66e3ec
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp
@@ -0,0 +1,219 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::Backend::GLSL {
12namespace {
13constexpr std::string_view SWIZZLE{"xyzw"};
14void CompositeInsert(EmitContext& ctx, std::string_view result, std::string_view composite,
15 std::string_view object, u32 index) {
16 if (result == composite) {
17 // The result is aliased with the composite
18 ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
19 } else {
20 ctx.Add("{}={};{}.{}={};", result, composite, result, SWIZZLE[index], object);
21 }
22}
23} // Anonymous namespace
24
25void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
26 std::string_view e2) {
27 ctx.AddU32x2("{}=uvec2({},{});", inst, e1, e2);
28}
29
30void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
31 std::string_view e2, std::string_view e3) {
32 ctx.AddU32x3("{}=uvec3({},{},{});", inst, e1, e2, e3);
33}
34
35void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
36 std::string_view e2, std::string_view e3, std::string_view e4) {
37 ctx.AddU32x4("{}=uvec4({},{},{},{});", inst, e1, e2, e3, e4);
38}
39
40void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
41 u32 index) {
42 ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]);
43}
44
45void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
46 u32 index) {
47 ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]);
48}
49
50void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
51 u32 index) {
52 ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]);
53}
54
55void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
56 std::string_view object, u32 index) {
57 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
58 CompositeInsert(ctx, ret, composite, object, index);
59}
60
61void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
62 std::string_view object, u32 index) {
63 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x3)};
64 CompositeInsert(ctx, ret, composite, object, index);
65}
66
67void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
68 std::string_view object, u32 index) {
69 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x4)};
70 CompositeInsert(ctx, ret, composite, object, index);
71}
72
73void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx,
74 [[maybe_unused]] std::string_view e1,
75 [[maybe_unused]] std::string_view e2) {
76 NotImplemented();
77}
78
79void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx,
80 [[maybe_unused]] std::string_view e1,
81 [[maybe_unused]] std::string_view e2,
82 [[maybe_unused]] std::string_view e3) {
83 NotImplemented();
84}
85
86void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx,
87 [[maybe_unused]] std::string_view e1,
88 [[maybe_unused]] std::string_view e2,
89 [[maybe_unused]] std::string_view e3,
90 [[maybe_unused]] std::string_view e4) {
91 NotImplemented();
92}
93
94void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx,
95 [[maybe_unused]] std::string_view composite,
96 [[maybe_unused]] u32 index) {
97 NotImplemented();
98}
99
100void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx,
101 [[maybe_unused]] std::string_view composite,
102 [[maybe_unused]] u32 index) {
103 NotImplemented();
104}
105
106void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx,
107 [[maybe_unused]] std::string_view composite,
108 [[maybe_unused]] u32 index) {
109 NotImplemented();
110}
111
112void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx,
113 [[maybe_unused]] std::string_view composite,
114 [[maybe_unused]] std::string_view object,
115 [[maybe_unused]] u32 index) {
116 NotImplemented();
117}
118
119void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx,
120 [[maybe_unused]] std::string_view composite,
121 [[maybe_unused]] std::string_view object,
122 [[maybe_unused]] u32 index) {
123 NotImplemented();
124}
125
126void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx,
127 [[maybe_unused]] std::string_view composite,
128 [[maybe_unused]] std::string_view object,
129 [[maybe_unused]] u32 index) {
130 NotImplemented();
131}
132
133void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
134 std::string_view e2) {
135 ctx.AddF32x2("{}=vec2({},{});", inst, e1, e2);
136}
137
138void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
139 std::string_view e2, std::string_view e3) {
140 ctx.AddF32x3("{}=vec3({},{},{});", inst, e1, e2, e3);
141}
142
143void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
144 std::string_view e2, std::string_view e3, std::string_view e4) {
145 ctx.AddF32x4("{}=vec4({},{},{},{});", inst, e1, e2, e3, e4);
146}
147
148void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
149 u32 index) {
150 ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]);
151}
152
153void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
154 u32 index) {
155 ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]);
156}
157
158void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
159 u32 index) {
160 ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]);
161}
162
163void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
164 std::string_view object, u32 index) {
165 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x2)};
166 CompositeInsert(ctx, ret, composite, object, index);
167}
168
169void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
170 std::string_view object, u32 index) {
171 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x3)};
172 CompositeInsert(ctx, ret, composite, object, index);
173}
174
175void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
176 std::string_view object, u32 index) {
177 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
178 CompositeInsert(ctx, ret, composite, object, index);
179}
180
181void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) {
182 NotImplemented();
183}
184
185void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) {
186 NotImplemented();
187}
188
189void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) {
190 NotImplemented();
191}
192
193void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) {
194 NotImplemented();
195}
196
197void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) {
198 NotImplemented();
199}
200
201void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) {
202 NotImplemented();
203}
204
205void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object,
206 u32 index) {
207 ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
208}
209
210void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object,
211 u32 index) {
212 ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
213}
214
215void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object,
216 u32 index) {
217 ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
218}
219} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
new file mode 100644
index 000000000..580063fa9
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -0,0 +1,456 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10#include "shader_recompiler/profile.h"
11#include "shader_recompiler/runtime_info.h"
12
13namespace Shader::Backend::GLSL {
14namespace {
15constexpr char SWIZZLE[]{"xyzw"};
16
17u32 CbufIndex(u32 offset) {
18 return (offset / 4) % 4;
19}
20
21char OffsetSwizzle(u32 offset) {
22 return SWIZZLE[CbufIndex(offset)];
23}
24
25bool IsInputArray(Stage stage) {
26 return stage == Stage::Geometry || stage == Stage::TessellationControl ||
27 stage == Stage::TessellationEval;
28}
29
30std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) {
31 return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
32}
33
34std::string_view OutputVertexIndex(EmitContext& ctx) {
35 return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
36}
37
38void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding,
39 const IR::Value& offset, u32 num_bits, std::string_view cast = {},
40 std::string_view bit_offset = {}) {
41 const bool is_immediate{offset.IsImmediate()};
42 const bool component_indexing_bug{!is_immediate && ctx.profile.has_gl_component_indexing_bug};
43 if (is_immediate) {
44 const s32 signed_offset{static_cast<s32>(offset.U32())};
45 static constexpr u32 cbuf_size{0x10000};
46 if (signed_offset < 0 || offset.U32() > cbuf_size) {
47 LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds");
48 ctx.Add("{}=0u;", ret);
49 return;
50 }
51 }
52 const auto offset_var{ctx.var_alloc.Consume(offset)};
53 const auto index{is_immediate ? fmt::format("{}", offset.U32() / 16)
54 : fmt::format("{}>>4", offset_var)};
55 const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32()))
56 : fmt::format("[({}>>2)%4]", offset_var)};
57
58 const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
59 const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)};
60 const auto extraction{num_bits == 32 ? cbuf_cast
61 : fmt ::format("bitfieldExtract({},int({}),{})", cbuf_cast,
62 bit_offset, num_bits)};
63 if (!component_indexing_bug) {
64 const auto result{fmt::format(fmt::runtime(extraction), swizzle)};
65 ctx.Add("{}={};", ret, result);
66 return;
67 }
68 const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
69 for (u32 i = 0; i < 4; ++i) {
70 const auto swizzle_string{fmt::format(".{}", "xyzw"[i])};
71 const auto result{fmt::format(fmt::runtime(extraction), swizzle_string)};
72 ctx.Add("if(({}&3)=={}){}={};", cbuf_offset, i, ret, result);
73 }
74}
75
76void GetCbuf8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset,
77 std::string_view cast) {
78 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
79 if (offset.IsImmediate()) {
80 const auto bit_offset{fmt::format("{}", (offset.U32() % 4) * 8)};
81 GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset);
82 } else {
83 const auto offset_var{ctx.var_alloc.Consume(offset)};
84 const auto bit_offset{fmt::format("({}%4)*8", offset_var)};
85 GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset);
86 }
87}
88
89void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset,
90 std::string_view cast) {
91 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
92 if (offset.IsImmediate()) {
93 const auto bit_offset{fmt::format("{}", ((offset.U32() / 2) % 2) * 16)};
94 GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset);
95 } else {
96 const auto offset_var{ctx.var_alloc.Consume(offset)};
97 const auto bit_offset{fmt::format("(({}>>1)%2)*16", offset_var)};
98 GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset);
99 }
100}
101
102u32 TexCoordIndex(IR::Attribute attr) {
103 return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4;
104}
105} // Anonymous namespace
106
107void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
108 const IR::Value& offset) {
109 GetCbuf8(ctx, inst, binding, offset, "ftou");
110}
111
112void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
113 const IR::Value& offset) {
114 GetCbuf8(ctx, inst, binding, offset, "ftoi");
115}
116
117void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
118 const IR::Value& offset) {
119 GetCbuf16(ctx, inst, binding, offset, "ftou");
120}
121
122void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
123 const IR::Value& offset) {
124 GetCbuf16(ctx, inst, binding, offset, "ftoi");
125}
126
127void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
128 const IR::Value& offset) {
129 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
130 GetCbuf(ctx, ret, binding, offset, 32, "ftou");
131}
132
133void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
134 const IR::Value& offset) {
135 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)};
136 GetCbuf(ctx, ret, binding, offset, 32);
137}
138
139void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
140 const IR::Value& offset) {
141 const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
142 if (offset.IsImmediate()) {
143 static constexpr u32 cbuf_size{0x10000};
144 const u32 u32_offset{offset.U32()};
145 const s32 signed_offset{static_cast<s32>(offset.U32())};
146 if (signed_offset < 0 || u32_offset > cbuf_size) {
147 LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds");
148 ctx.AddU32x2("{}=uvec2(0u);", inst);
149 return;
150 }
151 if (u32_offset % 2 == 0) {
152 ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16,
153 OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4));
154 } else {
155 ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, u32_offset / 16,
156 OffsetSwizzle(u32_offset), cbuf, (u32_offset + 4) / 16,
157 OffsetSwizzle(u32_offset + 4));
158 }
159 return;
160 }
161 const auto offset_var{ctx.var_alloc.Consume(offset)};
162 if (!ctx.profile.has_gl_component_indexing_bug) {
163 ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));",
164 inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var);
165 return;
166 }
167 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
168 const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
169 for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
170 ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset,
171 swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var,
172 "xyzw"[(swizzle + 1) % 4]);
173 }
174}
175
176void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
177 std::string_view vertex) {
178 const u32 element{static_cast<u32>(attr) % 4};
179 const char swizzle{"xyzw"[element]};
180 if (IR::IsGeneric(attr)) {
181 const u32 index{IR::GenericAttributeIndex(attr)};
182 if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) {
183 if (element == 3) {
184 ctx.AddF32("{}=1.f;", inst, attr);
185 } else {
186 ctx.AddF32("{}=0.f;", inst, attr);
187 }
188 return;
189 }
190 ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle);
191 return;
192 }
193 // GLSL only exposes 8 legacy texcoords
194 if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) {
195 LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]",
196 TexCoordIndex(attr));
197 ctx.AddF32("{}=0.f;", inst);
198 return;
199 }
200 if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) {
201 const u32 index{TexCoordIndex(attr)};
202 ctx.AddF32("{}=gl_TexCoord[{}].{};", inst, index, swizzle);
203 return;
204 }
205 switch (attr) {
206 case IR::Attribute::PrimitiveId:
207 ctx.AddF32("{}=itof(gl_PrimitiveID);", inst);
208 break;
209 case IR::Attribute::PositionX:
210 case IR::Attribute::PositionY:
211 case IR::Attribute::PositionZ:
212 case IR::Attribute::PositionW: {
213 const bool is_array{IsInputArray(ctx.stage)};
214 const auto input_decorator{is_array ? fmt::format("gl_in[{}].", vertex) : ""};
215 ctx.AddF32("{}={}{}.{};", inst, input_decorator, ctx.position_name, swizzle);
216 break;
217 }
218 case IR::Attribute::ColorFrontDiffuseR:
219 case IR::Attribute::ColorFrontDiffuseG:
220 case IR::Attribute::ColorFrontDiffuseB:
221 case IR::Attribute::ColorFrontDiffuseA:
222 if (ctx.stage == Stage::Fragment) {
223 ctx.AddF32("{}=gl_Color.{};", inst, swizzle);
224 } else {
225 ctx.AddF32("{}=gl_FrontColor.{};", inst, swizzle);
226 }
227 break;
228 case IR::Attribute::PointSpriteS:
229 case IR::Attribute::PointSpriteT:
230 ctx.AddF32("{}=gl_PointCoord.{};", inst, swizzle);
231 break;
232 case IR::Attribute::TessellationEvaluationPointU:
233 case IR::Attribute::TessellationEvaluationPointV:
234 ctx.AddF32("{}=gl_TessCoord.{};", inst, swizzle);
235 break;
236 case IR::Attribute::InstanceId:
237 ctx.AddF32("{}=itof(gl_InstanceID);", inst);
238 break;
239 case IR::Attribute::VertexId:
240 ctx.AddF32("{}=itof(gl_VertexID);", inst);
241 break;
242 case IR::Attribute::FrontFace:
243 ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst);
244 break;
245 default:
246 throw NotImplementedException("Get attribute {}", attr);
247 }
248}
249
250void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
251 [[maybe_unused]] std::string_view vertex) {
252 if (IR::IsGeneric(attr)) {
253 const u32 index{IR::GenericAttributeIndex(attr)};
254 const u32 attr_element{IR::GenericAttributeElement(attr)};
255 const GenericElementInfo& info{ctx.output_generics.at(index).at(attr_element)};
256 const auto output_decorator{OutputVertexIndex(ctx)};
257 if (info.num_components == 1) {
258 ctx.Add("{}{}={};", info.name, output_decorator, value);
259 } else {
260 const u32 index_element{attr_element - info.first_element};
261 ctx.Add("{}{}.{}={};", info.name, output_decorator, "xyzw"[index_element], value);
262 }
263 return;
264 }
265 const u32 element{static_cast<u32>(attr) % 4};
266 const char swizzle{"xyzw"[element]};
267 // GLSL only exposes 8 legacy texcoords
268 if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) {
269 LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]",
270 TexCoordIndex(attr));
271 return;
272 }
273 if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) {
274 const u32 index{TexCoordIndex(attr)};
275 ctx.Add("gl_TexCoord[{}].{}={};", index, swizzle, value);
276 return;
277 }
278 switch (attr) {
279 case IR::Attribute::Layer:
280 if (ctx.stage != Stage::Geometry &&
281 !ctx.profile.support_viewport_index_layer_non_geometry) {
282 LOG_WARNING(Shader_GLSL, "Shader stores viewport layer but device does not support "
283 "viewport layer extension");
284 break;
285 }
286 ctx.Add("gl_Layer=ftoi({});", value);
287 break;
288 case IR::Attribute::ViewportIndex:
289 if (ctx.stage != Stage::Geometry &&
290 !ctx.profile.support_viewport_index_layer_non_geometry) {
291 LOG_WARNING(Shader_GLSL, "Shader stores viewport index but device does not support "
292 "viewport layer extension");
293 break;
294 }
295 ctx.Add("gl_ViewportIndex=ftoi({});", value);
296 break;
297 case IR::Attribute::ViewportMask:
298 if (ctx.stage != Stage::Geometry && !ctx.profile.support_viewport_mask) {
299 LOG_WARNING(
300 Shader_GLSL,
301 "Shader stores viewport mask but device does not support viewport mask extension");
302 break;
303 }
304 ctx.Add("gl_ViewportMask[0]=ftoi({});", value);
305 break;
306 case IR::Attribute::PointSize:
307 ctx.Add("gl_PointSize={};", value);
308 break;
309 case IR::Attribute::PositionX:
310 case IR::Attribute::PositionY:
311 case IR::Attribute::PositionZ:
312 case IR::Attribute::PositionW:
313 ctx.Add("gl_Position.{}={};", swizzle, value);
314 break;
315 case IR::Attribute::ColorFrontDiffuseR:
316 case IR::Attribute::ColorFrontDiffuseG:
317 case IR::Attribute::ColorFrontDiffuseB:
318 case IR::Attribute::ColorFrontDiffuseA:
319 ctx.Add("gl_FrontColor.{}={};", swizzle, value);
320 break;
321 case IR::Attribute::ColorFrontSpecularR:
322 case IR::Attribute::ColorFrontSpecularG:
323 case IR::Attribute::ColorFrontSpecularB:
324 case IR::Attribute::ColorFrontSpecularA:
325 ctx.Add("gl_FrontSecondaryColor.{}={};", swizzle, value);
326 break;
327 case IR::Attribute::ColorBackDiffuseR:
328 case IR::Attribute::ColorBackDiffuseG:
329 case IR::Attribute::ColorBackDiffuseB:
330 case IR::Attribute::ColorBackDiffuseA:
331 ctx.Add("gl_BackColor.{}={};", swizzle, value);
332 break;
333 case IR::Attribute::ColorBackSpecularR:
334 case IR::Attribute::ColorBackSpecularG:
335 case IR::Attribute::ColorBackSpecularB:
336 case IR::Attribute::ColorBackSpecularA:
337 ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value);
338 break;
339 case IR::Attribute::FogCoordinate:
340 ctx.Add("gl_FogFragCoord={};", value);
341 break;
342 case IR::Attribute::ClipDistance0:
343 case IR::Attribute::ClipDistance1:
344 case IR::Attribute::ClipDistance2:
345 case IR::Attribute::ClipDistance3:
346 case IR::Attribute::ClipDistance4:
347 case IR::Attribute::ClipDistance5:
348 case IR::Attribute::ClipDistance6:
349 case IR::Attribute::ClipDistance7: {
350 const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)};
351 ctx.Add("gl_ClipDistance[{}]={};", index, value);
352 break;
353 }
354 default:
355 throw NotImplementedException("Set attribute {}", attr);
356 }
357}
358
359void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
360 std::string_view vertex) {
361 const bool is_array{ctx.stage == Stage::Geometry};
362 const auto vertex_arg{is_array ? fmt::format(",{}", vertex) : ""};
363 ctx.AddF32("{}=IndexedAttrLoad(int({}){});", inst, offset, vertex_arg);
364}
365
366void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx,
367 [[maybe_unused]] std::string_view offset,
368 [[maybe_unused]] std::string_view value,
369 [[maybe_unused]] std::string_view vertex) {
370 NotImplemented();
371}
372
373void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) {
374 if (!IR::IsGeneric(patch)) {
375 throw NotImplementedException("Non-generic patch load");
376 }
377 const u32 index{IR::GenericPatchIndex(patch)};
378 const u32 element{IR::GenericPatchElement(patch)};
379 const char swizzle{"xyzw"[element]};
380 ctx.AddF32("{}=patch{}.{};", inst, index, swizzle);
381}
382
383void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) {
384 if (IR::IsGeneric(patch)) {
385 const u32 index{IR::GenericPatchIndex(patch)};
386 const u32 element{IR::GenericPatchElement(patch)};
387 ctx.Add("patch{}.{}={};", index, "xyzw"[element], value);
388 return;
389 }
390 switch (patch) {
391 case IR::Patch::TessellationLodLeft:
392 case IR::Patch::TessellationLodRight:
393 case IR::Patch::TessellationLodTop:
394 case IR::Patch::TessellationLodBottom: {
395 const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
396 ctx.Add("gl_TessLevelOuter[{}]={};", index, value);
397 break;
398 }
399 case IR::Patch::TessellationLodInteriorU:
400 ctx.Add("gl_TessLevelInner[0]={};", value);
401 break;
402 case IR::Patch::TessellationLodInteriorV:
403 ctx.Add("gl_TessLevelInner[1]={};", value);
404 break;
405 default:
406 throw NotImplementedException("Patch {}", patch);
407 }
408}
409
410void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) {
411 const char swizzle{"xyzw"[component]};
412 ctx.Add("frag_color{}.{}={};", index, swizzle, value);
413}
414
415void EmitSetSampleMask(EmitContext& ctx, std::string_view value) {
416 ctx.Add("gl_SampleMask[0]=int({});", value);
417}
418
419void EmitSetFragDepth(EmitContext& ctx, std::string_view value) {
420 ctx.Add("gl_FragDepth={};", value);
421}
422
423void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) {
424 ctx.AddU32x3("{}=gl_LocalInvocationID;", inst);
425}
426
427void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) {
428 ctx.AddU32x3("{}=gl_WorkGroupID;", inst);
429}
430
431void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) {
432 ctx.AddU32("{}=uint(gl_InvocationID);", inst);
433}
434
435void EmitSampleId(EmitContext& ctx, IR::Inst& inst) {
436 ctx.AddU32("{}=uint(gl_SampleID);", inst);
437}
438
439void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) {
440 ctx.AddU1("{}=gl_HelperInvocation;", inst);
441}
442
443void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
444 ctx.uses_y_direction = true;
445 ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst);
446}
447
448void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) {
449 ctx.AddU32("{}=lmem[{}];", inst, word_offset);
450}
451
452void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) {
453 ctx.Add("lmem[{}]={};", word_offset, value);
454}
455
456} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp
new file mode 100644
index 000000000..53f8896be
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp
@@ -0,0 +1,21 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/exception.h"
10
11namespace Shader::Backend::GLSL {
12
13void EmitJoin(EmitContext&) {
14 throw NotImplementedException("Join shouldn't be emitted");
15}
16
17void EmitDemoteToHelperInvocation(EmitContext& ctx) {
18 ctx.Add("discard;");
19}
20
21} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp
new file mode 100644
index 000000000..eeae6562c
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp
@@ -0,0 +1,230 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::Backend::GLSL {
12void EmitConvertS16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
13 [[maybe_unused]] std::string_view value) {
14 NotImplemented();
15}
16
17void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
18 ctx.AddU32("{}=(int({})&0xffff)|(bitfieldExtract(int({}),31,1)<<15);", inst, value, value);
19}
20
21void EmitConvertS16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
22 [[maybe_unused]] std::string_view value) {
23 NotImplemented();
24}
25
26void EmitConvertS32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
27 [[maybe_unused]] std::string_view value) {
28 NotImplemented();
29}
30
31void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
32 ctx.AddU32("{}=int({});", inst, value);
33}
34
35void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
36 ctx.AddU32("{}=int({});", inst, value);
37}
38
39void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
40 [[maybe_unused]] std::string_view value) {
41 NotImplemented();
42}
43
44void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
45 ctx.AddU64("{}=int64_t({});", inst, value);
46}
47
48void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
49 ctx.AddU64("{}=int64_t({});", inst, value);
50}
51
52void EmitConvertU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
53 [[maybe_unused]] std::string_view value) {
54 NotImplemented();
55}
56
57void EmitConvertU16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
58 [[maybe_unused]] std::string_view value) {
59 NotImplemented();
60}
61
62void EmitConvertU16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
63 [[maybe_unused]] std::string_view value) {
64 NotImplemented();
65}
66
67void EmitConvertU32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
68 [[maybe_unused]] std::string_view value) {
69 NotImplemented();
70}
71
72void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
73 ctx.AddU32("{}=uint({});", inst, value);
74}
75
76void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
77 ctx.AddU32("{}=uint({});", inst, value);
78}
79
80void EmitConvertU64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
81 [[maybe_unused]] std::string_view value) {
82 NotImplemented();
83}
84
85void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
86 ctx.AddU64("{}=uint64_t({});", inst, value);
87}
88
89void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
90 ctx.AddU64("{}=uint64_t({});", inst, value);
91}
92
93void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
94 ctx.AddU64("{}=uint64_t({});", inst, value);
95}
96
97void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
98 ctx.AddU32("{}=uint({});", inst, value);
99}
100
101void EmitConvertF16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
102 [[maybe_unused]] std::string_view value) {
103 NotImplemented();
104}
105
106void EmitConvertF32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
107 [[maybe_unused]] std::string_view value) {
108 NotImplemented();
109}
110
111void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
112 ctx.AddF32("{}=float({});", inst, value);
113}
114
115void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
116 ctx.AddF64("{}=double({});", inst, value);
117}
118
119void EmitConvertF16S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
120 [[maybe_unused]] std::string_view value) {
121 NotImplemented();
122}
123
124void EmitConvertF16S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
125 [[maybe_unused]] std::string_view value) {
126 NotImplemented();
127}
128
129void EmitConvertF16S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
130 [[maybe_unused]] std::string_view value) {
131 NotImplemented();
132}
133
134void EmitConvertF16S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
135 [[maybe_unused]] std::string_view value) {
136 NotImplemented();
137}
138
139void EmitConvertF16U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
140 [[maybe_unused]] std::string_view value) {
141 NotImplemented();
142}
143
144void EmitConvertF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
145 [[maybe_unused]] std::string_view value) {
146 NotImplemented();
147}
148
149void EmitConvertF16U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
150 [[maybe_unused]] std::string_view value) {
151 NotImplemented();
152}
153
154void EmitConvertF16U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
155 [[maybe_unused]] std::string_view value) {
156 NotImplemented();
157}
158
159void EmitConvertF32S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
160 [[maybe_unused]] std::string_view value) {
161 NotImplemented();
162}
163
164void EmitConvertF32S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
165 [[maybe_unused]] std::string_view value) {
166 NotImplemented();
167}
168
169void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
170 ctx.AddF32("{}=float(int({}));", inst, value);
171}
172
173void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
174 ctx.AddF32("{}=float(int64_t({}));", inst, value);
175}
176
177void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
178 [[maybe_unused]] std::string_view value) {
179 NotImplemented();
180}
181
182void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
183 ctx.AddF32("{}=float({}&0xffff);", inst, value);
184}
185
186void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
187 ctx.AddF32("{}=float({});", inst, value);
188}
189
190void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
191 ctx.AddF32("{}=float({});", inst, value);
192}
193
194void EmitConvertF64S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
195 [[maybe_unused]] std::string_view value) {
196 NotImplemented();
197}
198
199void EmitConvertF64S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
200 [[maybe_unused]] std::string_view value) {
201 NotImplemented();
202}
203
204void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
205 ctx.AddF64("{}=double(int({}));", inst, value);
206}
207
208void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
209 ctx.AddF64("{}=double(int64_t({}));", inst, value);
210}
211
212void EmitConvertF64U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
213 [[maybe_unused]] std::string_view value) {
214 NotImplemented();
215}
216
217void EmitConvertF64U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
218 [[maybe_unused]] std::string_view value) {
219 NotImplemented();
220}
221
222void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
223 ctx.AddF64("{}=double({});", inst, value);
224}
225
226void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
227 ctx.AddF64("{}=double({});", inst, value);
228}
229
230} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp
new file mode 100644
index 000000000..d423bfb1b
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp
@@ -0,0 +1,456 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/ir/value.h"
11
12namespace Shader::Backend::GLSL {
13namespace {
14void Compare(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs,
15 std::string_view op, bool ordered) {
16 const auto nan_op{ordered ? "&&!" : "||"};
17 ctx.AddU1("{}={}{}{}"
18 "{}isnan({}){}isnan({});",
19 inst, lhs, op, rhs, nan_op, lhs, nan_op, rhs);
20}
21
22bool IsPrecise(const IR::Inst& inst) {
23 return inst.Flags<IR::FpControl>().no_contraction;
24}
25} // Anonymous namespace
26
27void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
28 [[maybe_unused]] std::string_view value) {
29 NotImplemented();
30}
31
32void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
33 ctx.AddF32("{}=abs({});", inst, value);
34}
35
36void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
37 ctx.AddF64("{}=abs({});", inst, value);
38}
39
40void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
41 [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) {
42 NotImplemented();
43}
44
45void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
46 if (IsPrecise(inst)) {
47 ctx.AddPrecF32("{}={}+{};", inst, a, b);
48 } else {
49 ctx.AddF32("{}={}+{};", inst, a, b);
50 }
51}
52
53void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
54 if (IsPrecise(inst)) {
55 ctx.AddPrecF64("{}={}+{};", inst, a, b);
56 } else {
57 ctx.AddF64("{}={}+{};", inst, a, b);
58 }
59}
60
61void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
62 [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b,
63 [[maybe_unused]] std::string_view c) {
64 NotImplemented();
65}
66
67void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
68 std::string_view c) {
69 if (IsPrecise(inst)) {
70 ctx.AddPrecF32("{}=fma({},{},{});", inst, a, b, c);
71 } else {
72 ctx.AddF32("{}=fma({},{},{});", inst, a, b, c);
73 }
74}
75
76void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
77 std::string_view c) {
78 if (IsPrecise(inst)) {
79 ctx.AddPrecF64("{}=fma({},{},{});", inst, a, b, c);
80 } else {
81 ctx.AddF64("{}=fma({},{},{});", inst, a, b, c);
82 }
83}
84
85void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
86 ctx.AddF32("{}=max({},{});", inst, a, b);
87}
88
89void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
90 ctx.AddF64("{}=max({},{});", inst, a, b);
91}
92
93void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
94 ctx.AddF32("{}=min({},{});", inst, a, b);
95}
96
97void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
98 ctx.AddF64("{}=min({},{});", inst, a, b);
99}
100
101void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
102 [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) {
103 NotImplemented();
104}
105
106void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
107 if (IsPrecise(inst)) {
108 ctx.AddPrecF32("{}={}*{};", inst, a, b);
109 } else {
110 ctx.AddF32("{}={}*{};", inst, a, b);
111 }
112}
113
114void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
115 if (IsPrecise(inst)) {
116 ctx.AddPrecF64("{}={}*{};", inst, a, b);
117 } else {
118 ctx.AddF64("{}={}*{};", inst, a, b);
119 }
120}
121
122void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
123 [[maybe_unused]] std::string_view value) {
124 NotImplemented();
125}
126
127void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
128 ctx.AddF32("{}=-({});", inst, value);
129}
130
131void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
132 ctx.AddF64("{}=-({});", inst, value);
133}
134
135void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
136 ctx.AddF32("{}=sin({});", inst, value);
137}
138
139void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
140 ctx.AddF32("{}=cos({});", inst, value);
141}
142
143void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
144 ctx.AddF32("{}=exp2({});", inst, value);
145}
146
147void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
148 ctx.AddF32("{}=log2({});", inst, value);
149}
150
151void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
152 ctx.AddF32("{}=(1.0f)/{};", inst, value);
153}
154
155void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
156 ctx.AddF64("{}=1.0/{};", inst, value);
157}
158
159void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
160 [[maybe_unused]] std::string_view value) {
161 ctx.AddF32("{}=inversesqrt({});", inst, value);
162}
163
164void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
165 [[maybe_unused]] std::string_view value) {
166 NotImplemented();
167}
168
169void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
170 ctx.AddF32("{}=sqrt({});", inst, value);
171}
172
173void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
174 [[maybe_unused]] std::string_view value) {
175 NotImplemented();
176}
177
178void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
179 ctx.AddF32("{}=min(max({},0.0),1.0);", inst, value);
180}
181
182void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
183 ctx.AddF64("{}=min(max({},0.0),1.0);", inst, value);
184}
185
186void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
187 [[maybe_unused]] std::string_view value,
188 [[maybe_unused]] std::string_view min_value,
189 [[maybe_unused]] std::string_view max_value) {
190 NotImplemented();
191}
192
193void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value,
194 std::string_view min_value, std::string_view max_value) {
195 // GLSL's clamp does not produce desirable results
196 ctx.AddF32("{}=min(max({},float({})),float({}));", inst, value, min_value, max_value);
197}
198
199void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value,
200 std::string_view min_value, std::string_view max_value) {
201 // GLSL's clamp does not produce desirable results
202 ctx.AddF64("{}=min(max({},double({})),double({}));", inst, value, min_value, max_value);
203}
204
205void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
206 [[maybe_unused]] std::string_view value) {
207 NotImplemented();
208}
209
210void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
211 ctx.AddF32("{}=roundEven({});", inst, value);
212}
213
214void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
215 ctx.AddF64("{}=roundEven({});", inst, value);
216}
217
218void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
219 [[maybe_unused]] std::string_view value) {
220 NotImplemented();
221}
222
223void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
224 ctx.AddF32("{}=floor({});", inst, value);
225}
226
227void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
228 ctx.AddF64("{}=floor({});", inst, value);
229}
230
231void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
232 [[maybe_unused]] std::string_view value) {
233 NotImplemented();
234}
235
236void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
237 ctx.AddF32("{}=ceil({});", inst, value);
238}
239
240void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
241 ctx.AddF64("{}=ceil({});", inst, value);
242}
243
244void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
245 [[maybe_unused]] std::string_view value) {
246 NotImplemented();
247}
248
249void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
250 ctx.AddF32("{}=trunc({});", inst, value);
251}
252
253void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
254 ctx.AddF64("{}=trunc({});", inst, value);
255}
256
257void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
258 [[maybe_unused]] std::string_view rhs) {
259 NotImplemented();
260}
261
262void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
263 std::string_view rhs) {
264 Compare(ctx, inst, lhs, rhs, "==", true);
265}
266
267void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
268 std::string_view rhs) {
269 Compare(ctx, inst, lhs, rhs, "==", true);
270}
271
272void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
273 [[maybe_unused]] std::string_view rhs) {
274 NotImplemented();
275}
276
277void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
278 std::string_view rhs) {
279 Compare(ctx, inst, lhs, rhs, "==", false);
280}
281
282void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
283 std::string_view rhs) {
284 Compare(ctx, inst, lhs, rhs, "==", false);
285}
286
287void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
288 [[maybe_unused]] std::string_view rhs) {
289 NotImplemented();
290}
291
292void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
293 std::string_view rhs) {
294 Compare(ctx, inst, lhs, rhs, "!=", true);
295}
296
297void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
298 std::string_view rhs) {
299 Compare(ctx, inst, lhs, rhs, "!=", true);
300}
301
302void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
303 [[maybe_unused]] std::string_view rhs) {
304 NotImplemented();
305}
306
307void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
308 std::string_view rhs) {
309 Compare(ctx, inst, lhs, rhs, "!=", false);
310}
311
312void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
313 std::string_view rhs) {
314 Compare(ctx, inst, lhs, rhs, "!=", false);
315}
316
317void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
318 [[maybe_unused]] std::string_view rhs) {
319 NotImplemented();
320}
321
322void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
323 std::string_view rhs) {
324 Compare(ctx, inst, lhs, rhs, "<", true);
325}
326
327void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
328 std::string_view rhs) {
329 Compare(ctx, inst, lhs, rhs, "<", true);
330}
331
332void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
333 [[maybe_unused]] std::string_view rhs) {
334 NotImplemented();
335}
336
337void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
338 std::string_view rhs) {
339 Compare(ctx, inst, lhs, rhs, "<", false);
340}
341
342void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
343 std::string_view rhs) {
344 Compare(ctx, inst, lhs, rhs, "<", false);
345}
346
347void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx,
348 [[maybe_unused]] std::string_view lhs,
349 [[maybe_unused]] std::string_view rhs) {
350 NotImplemented();
351}
352
353void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
354 std::string_view rhs) {
355 Compare(ctx, inst, lhs, rhs, ">", true);
356}
357
358void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
359 std::string_view rhs) {
360 Compare(ctx, inst, lhs, rhs, ">", true);
361}
362
363void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx,
364 [[maybe_unused]] std::string_view lhs,
365 [[maybe_unused]] std::string_view rhs) {
366 NotImplemented();
367}
368
369void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
370 std::string_view rhs) {
371 Compare(ctx, inst, lhs, rhs, ">", false);
372}
373
374void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
375 std::string_view rhs) {
376 Compare(ctx, inst, lhs, rhs, ">", false);
377}
378
379void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx,
380 [[maybe_unused]] std::string_view lhs,
381 [[maybe_unused]] std::string_view rhs) {
382 NotImplemented();
383}
384
385void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
386 std::string_view rhs) {
387 Compare(ctx, inst, lhs, rhs, "<=", true);
388}
389
390void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
391 std::string_view rhs) {
392 Compare(ctx, inst, lhs, rhs, "<=", true);
393}
394
395void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx,
396 [[maybe_unused]] std::string_view lhs,
397 [[maybe_unused]] std::string_view rhs) {
398 NotImplemented();
399}
400
401void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
402 std::string_view rhs) {
403 Compare(ctx, inst, lhs, rhs, "<=", false);
404}
405
406void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
407 std::string_view rhs) {
408 Compare(ctx, inst, lhs, rhs, "<=", false);
409}
410
411void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx,
412 [[maybe_unused]] std::string_view lhs,
413 [[maybe_unused]] std::string_view rhs) {
414 NotImplemented();
415}
416
417void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
418 std::string_view rhs) {
419 Compare(ctx, inst, lhs, rhs, ">=", true);
420}
421
422void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
423 std::string_view rhs) {
424 Compare(ctx, inst, lhs, rhs, ">=", true);
425}
426
427void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx,
428 [[maybe_unused]] std::string_view lhs,
429 [[maybe_unused]] std::string_view rhs) {
430 NotImplemented();
431}
432
433void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
434 std::string_view rhs) {
435 Compare(ctx, inst, lhs, rhs, ">=", false);
436}
437
438void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
439 std::string_view rhs) {
440 Compare(ctx, inst, lhs, rhs, ">=", false);
441}
442
443void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
444 [[maybe_unused]] std::string_view value) {
445 NotImplemented();
446}
447
448void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
449 ctx.AddU1("{}=isnan({});", inst, value);
450}
451
452void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
453 ctx.AddU1("{}=isnan({});", inst, value);
454}
455
456} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
new file mode 100644
index 000000000..447eb8e0a
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
@@ -0,0 +1,799 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/ir/value.h"
11#include "shader_recompiler/profile.h"
12
13namespace Shader::Backend::GLSL {
14namespace {
15std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) {
16 const auto def{info.type == TextureType::Buffer ? ctx.texture_buffers.at(info.descriptor_index)
17 : ctx.textures.at(info.descriptor_index)};
18 const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""};
19 return fmt::format("tex{}{}", def.binding, index_offset);
20}
21
22std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) {
23 const auto def{info.type == TextureType::Buffer ? ctx.image_buffers.at(info.descriptor_index)
24 : ctx.images.at(info.descriptor_index)};
25 const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""};
26 return fmt::format("img{}{}", def.binding, index_offset);
27}
28
29std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info) {
30 switch (info.type) {
31 case TextureType::Color1D:
32 case TextureType::Buffer:
33 return fmt::format("int({})", value);
34 case TextureType::ColorArray1D:
35 case TextureType::Color2D:
36 case TextureType::ColorArray2D:
37 return fmt::format("ivec2({})", value);
38 case TextureType::Color3D:
39 case TextureType::ColorCube:
40 return fmt::format("ivec3({})", value);
41 case TextureType::ColorArrayCube:
42 return fmt::format("ivec4({})", value);
43 default:
44 throw NotImplementedException("Integer cast for TextureType {}", info.type.Value());
45 }
46}
47
48std::string CoordsCastToInt(std::string_view value, const IR::TextureInstInfo& info) {
49 switch (info.type) {
50 case TextureType::Color1D:
51 case TextureType::Buffer:
52 return fmt::format("int({})", value);
53 case TextureType::ColorArray1D:
54 case TextureType::Color2D:
55 return fmt::format("ivec2({})", value);
56 case TextureType::ColorArray2D:
57 case TextureType::Color3D:
58 case TextureType::ColorCube:
59 return fmt::format("ivec3({})", value);
60 case TextureType::ColorArrayCube:
61 return fmt::format("ivec4({})", value);
62 default:
63 throw NotImplementedException("TexelFetchCast type {}", info.type.Value());
64 }
65}
66
67bool NeedsShadowLodExt(TextureType type) {
68 switch (type) {
69 case TextureType::ColorArray2D:
70 case TextureType::ColorCube:
71 case TextureType::ColorArrayCube:
72 return true;
73 default:
74 return false;
75 }
76}
77
78std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) {
79 if (offset.IsImmediate()) {
80 return fmt::format("int({})", offset.U32());
81 }
82 IR::Inst* const inst{offset.InstRecursive()};
83 if (inst->AreAllArgsImmediates()) {
84 switch (inst->GetOpcode()) {
85 case IR::Opcode::CompositeConstructU32x2:
86 return fmt::format("ivec2({},{})", inst->Arg(0).U32(), inst->Arg(1).U32());
87 case IR::Opcode::CompositeConstructU32x3:
88 return fmt::format("ivec3({},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(),
89 inst->Arg(2).U32());
90 case IR::Opcode::CompositeConstructU32x4:
91 return fmt::format("ivec4({},{},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(),
92 inst->Arg(2).U32(), inst->Arg(3).U32());
93 default:
94 break;
95 }
96 }
97 const bool has_var_aoffi{ctx.profile.support_gl_variable_aoffi};
98 if (!has_var_aoffi) {
99 LOG_WARNING(Shader_GLSL, "Device does not support variable texture offsets, STUBBING");
100 }
101 const auto offset_str{has_var_aoffi ? ctx.var_alloc.Consume(offset) : "0"};
102 switch (offset.Type()) {
103 case IR::Type::U32:
104 return fmt::format("int({})", offset_str);
105 case IR::Type::U32x2:
106 return fmt::format("ivec2({})", offset_str);
107 case IR::Type::U32x3:
108 return fmt::format("ivec3({})", offset_str);
109 case IR::Type::U32x4:
110 return fmt::format("ivec4({})", offset_str);
111 default:
112 throw NotImplementedException("Offset type {}", offset.Type());
113 }
114}
115
116std::string PtpOffsets(const IR::Value& offset, const IR::Value& offset2) {
117 const std::array values{offset.InstRecursive(), offset2.InstRecursive()};
118 if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) {
119 LOG_WARNING(Shader_GLSL, "Not all arguments in PTP are immediate, STUBBING");
120 return "ivec2[](ivec2(0), ivec2(1), ivec2(2), ivec2(3))";
121 }
122 const IR::Opcode opcode{values[0]->GetOpcode()};
123 if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
124 throw LogicError("Invalid PTP arguments");
125 }
126 auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }};
127
128 return fmt::format("ivec2[](ivec2({},{}),ivec2({},{}),ivec2({},{}),ivec2({},{}))", read(0, 0),
129 read(0, 1), read(0, 2), read(0, 3), read(1, 0), read(1, 1), read(1, 2),
130 read(1, 3));
131}
132
133IR::Inst* PrepareSparse(IR::Inst& inst) {
134 const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
135 if (sparse_inst) {
136 sparse_inst->Invalidate();
137 }
138 return sparse_inst;
139}
140} // Anonymous namespace
141
142void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
143 std::string_view coords, std::string_view bias_lc,
144 const IR::Value& offset) {
145 const auto info{inst.Flags<IR::TextureInstInfo>()};
146 if (info.has_lod_clamp) {
147 throw NotImplementedException("EmitImageSampleImplicitLod Lod clamp samples");
148 }
149 const auto texture{Texture(ctx, info, index)};
150 const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""};
151 const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
152 const auto sparse_inst{PrepareSparse(inst)};
153 const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
154 if (sparse_inst && !supports_sparse) {
155 LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
156 ctx.AddU1("{}=true;", *sparse_inst);
157 }
158 if (!sparse_inst || !supports_sparse) {
159 if (!offset.IsEmpty()) {
160 const auto offset_str{GetOffsetVec(ctx, offset)};
161 if (ctx.stage == Stage::Fragment) {
162 ctx.Add("{}=textureOffset({},{},{}{});", texel, texture, coords, offset_str, bias);
163 } else {
164 ctx.Add("{}=textureLodOffset({},{},0.0,{});", texel, texture, coords, offset_str);
165 }
166 } else {
167 if (ctx.stage == Stage::Fragment) {
168 ctx.Add("{}=texture({},{}{});", texel, texture, coords, bias);
169 } else {
170 ctx.Add("{}=textureLod({},{},0.0);", texel, texture, coords);
171 }
172 }
173 return;
174 }
175 if (!offset.IsEmpty()) {
176 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureOffsetARB({},{},{},{}{}));",
177 *sparse_inst, texture, coords, GetOffsetVec(ctx, offset), texel, bias);
178 } else {
179 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureARB({},{},{}{}));", *sparse_inst,
180 texture, coords, texel, bias);
181 }
182}
183
184void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
185 std::string_view coords, std::string_view lod_lc,
186 const IR::Value& offset) {
187 const auto info{inst.Flags<IR::TextureInstInfo>()};
188 if (info.has_bias) {
189 throw NotImplementedException("EmitImageSampleExplicitLod Bias texture samples");
190 }
191 if (info.has_lod_clamp) {
192 throw NotImplementedException("EmitImageSampleExplicitLod Lod clamp samples");
193 }
194 const auto texture{Texture(ctx, info, index)};
195 const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
196 const auto sparse_inst{PrepareSparse(inst)};
197 const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
198 if (sparse_inst && !supports_sparse) {
199 LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
200 ctx.AddU1("{}=true;", *sparse_inst);
201 }
202 if (!sparse_inst || !supports_sparse) {
203 if (!offset.IsEmpty()) {
204 ctx.Add("{}=textureLodOffset({},{},{},{});", texel, texture, coords, lod_lc,
205 GetOffsetVec(ctx, offset));
206 } else {
207 ctx.Add("{}=textureLod({},{},{});", texel, texture, coords, lod_lc);
208 }
209 return;
210 }
211 if (!offset.IsEmpty()) {
212 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));",
213 *sparse_inst, texture, CastToIntVec(coords, info), lod_lc,
214 GetOffsetVec(ctx, offset), texel);
215 } else {
216 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureLodARB({},{},{},{}));", *sparse_inst,
217 texture, coords, lod_lc, texel);
218 }
219}
220
221void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
222 std::string_view coords, std::string_view dref,
223 std::string_view bias_lc, const IR::Value& offset) {
224 const auto info{inst.Flags<IR::TextureInstInfo>()};
225 const auto sparse_inst{PrepareSparse(inst)};
226 if (sparse_inst) {
227 throw NotImplementedException("EmitImageSampleDrefImplicitLod Sparse texture samples");
228 }
229 if (info.has_bias) {
230 throw NotImplementedException("EmitImageSampleDrefImplicitLod Bias texture samples");
231 }
232 if (info.has_lod_clamp) {
233 throw NotImplementedException("EmitImageSampleDrefImplicitLod Lod clamp samples");
234 }
235 const auto texture{Texture(ctx, info, index)};
236 const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""};
237 const bool needs_shadow_ext{NeedsShadowLodExt(info.type)};
238 const auto cast{needs_shadow_ext ? "vec4" : "vec3"};
239 const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod &&
240 ctx.stage != Stage::Fragment && needs_shadow_ext};
241 if (use_grad) {
242 LOG_WARNING(Shader_GLSL,
243 "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback");
244 if (info.type == TextureType::ColorArrayCube) {
245 LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing");
246 ctx.AddF32("{}=0.0f;", inst);
247 return;
248 }
249 const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"};
250 ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref,
251 d_cast, d_cast);
252 return;
253 }
254 if (!offset.IsEmpty()) {
255 const auto offset_str{GetOffsetVec(ctx, offset)};
256 if (ctx.stage == Stage::Fragment) {
257 ctx.AddF32("{}=textureOffset({},{}({},{}),{}{});", inst, texture, cast, coords, dref,
258 offset_str, bias);
259 } else {
260 ctx.AddF32("{}=textureLodOffset({},{}({},{}),0.0,{});", inst, texture, cast, coords,
261 dref, offset_str);
262 }
263 } else {
264 if (ctx.stage == Stage::Fragment) {
265 if (info.type == TextureType::ColorArrayCube) {
266 ctx.AddF32("{}=texture({},vec4({}),{});", inst, texture, coords, dref);
267 } else {
268 ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias);
269 }
270 } else {
271 ctx.AddF32("{}=textureLod({},{}({},{}),0.0);", inst, texture, cast, coords, dref);
272 }
273 }
274}
275
276void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
277 std::string_view coords, std::string_view dref,
278 std::string_view lod_lc, const IR::Value& offset) {
279 const auto info{inst.Flags<IR::TextureInstInfo>()};
280 const auto sparse_inst{PrepareSparse(inst)};
281 if (sparse_inst) {
282 throw NotImplementedException("EmitImageSampleDrefExplicitLod Sparse texture samples");
283 }
284 if (info.has_bias) {
285 throw NotImplementedException("EmitImageSampleDrefExplicitLod Bias texture samples");
286 }
287 if (info.has_lod_clamp) {
288 throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples");
289 }
290 const auto texture{Texture(ctx, info, index)};
291 const bool needs_shadow_ext{NeedsShadowLodExt(info.type)};
292 const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext};
293 const auto cast{needs_shadow_ext ? "vec4" : "vec3"};
294 if (use_grad) {
295 LOG_WARNING(Shader_GLSL,
296 "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback");
297 if (info.type == TextureType::ColorArrayCube) {
298 LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing");
299 ctx.AddF32("{}=0.0f;", inst);
300 return;
301 }
302 const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"};
303 ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref,
304 d_cast, d_cast);
305 return;
306 }
307 if (!offset.IsEmpty()) {
308 const auto offset_str{GetOffsetVec(ctx, offset)};
309 if (info.type == TextureType::ColorArrayCube) {
310 ctx.AddF32("{}=textureLodOffset({},{},{},{},{});", inst, texture, coords, dref, lod_lc,
311 offset_str);
312 } else {
313 ctx.AddF32("{}=textureLodOffset({},{}({},{}),{},{});", inst, texture, cast, coords,
314 dref, lod_lc, offset_str);
315 }
316 } else {
317 if (info.type == TextureType::ColorArrayCube) {
318 ctx.AddF32("{}=textureLod({},{},{},{});", inst, texture, coords, dref, lod_lc);
319 } else {
320 ctx.AddF32("{}=textureLod({},{}({},{}),{});", inst, texture, cast, coords, dref,
321 lod_lc);
322 }
323 }
324}
325
326void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
327 std::string_view coords, const IR::Value& offset, const IR::Value& offset2) {
328 const auto info{inst.Flags<IR::TextureInstInfo>()};
329 const auto texture{Texture(ctx, info, index)};
330 const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
331 const auto sparse_inst{PrepareSparse(inst)};
332 const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
333 if (sparse_inst && !supports_sparse) {
334 LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
335 ctx.AddU1("{}=true;", *sparse_inst);
336 }
337 if (!sparse_inst || !supports_sparse) {
338 if (offset.IsEmpty()) {
339 ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords,
340 info.gather_component);
341 return;
342 }
343 if (offset2.IsEmpty()) {
344 ctx.Add("{}=textureGatherOffset({},{},{},int({}));", texel, texture, coords,
345 GetOffsetVec(ctx, offset), info.gather_component);
346 return;
347 }
348 // PTP
349 const auto offsets{PtpOffsets(offset, offset2)};
350 ctx.Add("{}=textureGatherOffsets({},{},{},int({}));", texel, texture, coords, offsets,
351 info.gather_component);
352 return;
353 }
354 if (offset.IsEmpty()) {
355 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},int({})));",
356 *sparse_inst, texture, coords, texel, info.gather_component);
357 return;
358 }
359 if (offset2.IsEmpty()) {
360 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));",
361 *sparse_inst, texture, CastToIntVec(coords, info), GetOffsetVec(ctx, offset),
362 texel, info.gather_component);
363 return;
364 }
365 // PTP
366 const auto offsets{PtpOffsets(offset, offset2)};
367 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));",
368 *sparse_inst, texture, CastToIntVec(coords, info), offsets, texel,
369 info.gather_component);
370}
371
372void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
373 std::string_view coords, const IR::Value& offset, const IR::Value& offset2,
374 std::string_view dref) {
375 const auto info{inst.Flags<IR::TextureInstInfo>()};
376 const auto texture{Texture(ctx, info, index)};
377 const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
378 const auto sparse_inst{PrepareSparse(inst)};
379 const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
380 if (sparse_inst && !supports_sparse) {
381 LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
382 ctx.AddU1("{}=true;", *sparse_inst);
383 }
384 if (!sparse_inst || !supports_sparse) {
385 if (offset.IsEmpty()) {
386 ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref);
387 return;
388 }
389 if (offset2.IsEmpty()) {
390 ctx.Add("{}=textureGatherOffset({},{},{},{});", texel, texture, coords, dref,
391 GetOffsetVec(ctx, offset));
392 return;
393 }
394 // PTP
395 const auto offsets{PtpOffsets(offset, offset2)};
396 ctx.Add("{}=textureGatherOffsets({},{},{},{});", texel, texture, coords, dref, offsets);
397 return;
398 }
399 if (offset.IsEmpty()) {
400 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},{}));", *sparse_inst,
401 texture, coords, dref, texel);
402 return;
403 }
404 if (offset2.IsEmpty()) {
405 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));",
406 *sparse_inst, texture, CastToIntVec(coords, info), dref,
407 GetOffsetVec(ctx, offset), texel);
408 return;
409 }
410 // PTP
411 const auto offsets{PtpOffsets(offset, offset2)};
412 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));",
413 *sparse_inst, texture, CastToIntVec(coords, info), dref, offsets, texel);
414}
415
416void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
417 std::string_view coords, std::string_view offset, std::string_view lod,
418 [[maybe_unused]] std::string_view ms) {
419 const auto info{inst.Flags<IR::TextureInstInfo>()};
420 if (info.has_bias) {
421 throw NotImplementedException("EmitImageFetch Bias texture samples");
422 }
423 if (info.has_lod_clamp) {
424 throw NotImplementedException("EmitImageFetch Lod clamp samples");
425 }
426 const auto texture{Texture(ctx, info, index)};
427 const auto sparse_inst{PrepareSparse(inst)};
428 const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
429 const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
430 if (sparse_inst && !supports_sparse) {
431 LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
432 ctx.AddU1("{}=true;", *sparse_inst);
433 }
434 if (!sparse_inst || !supports_sparse) {
435 if (!offset.empty()) {
436 ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture,
437 CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info));
438 } else {
439 if (info.type == TextureType::Buffer) {
440 ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords);
441 } else {
442 ctx.Add("{}=texelFetch({},{},int({}));", texel, texture,
443 CoordsCastToInt(coords, info), lod);
444 }
445 }
446 return;
447 }
448 if (!offset.empty()) {
449 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));",
450 *sparse_inst, texture, CastToIntVec(coords, info), lod,
451 CastToIntVec(offset, info), texel);
452 } else {
453 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},int({}),{}));",
454 *sparse_inst, texture, CastToIntVec(coords, info), lod, texel);
455 }
456}
457
458void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
459 std::string_view lod) {
460 const auto info{inst.Flags<IR::TextureInstInfo>()};
461 const auto texture{Texture(ctx, info, index)};
462 switch (info.type) {
463 case TextureType::Color1D:
464 return ctx.AddU32x4(
465 "{}=uvec4(uint(textureSize({},int({}))),0u,0u,uint(textureQueryLevels({})));", inst,
466 texture, lod, texture);
467 case TextureType::ColorArray1D:
468 case TextureType::Color2D:
469 case TextureType::ColorCube:
470 return ctx.AddU32x4(
471 "{}=uvec4(uvec2(textureSize({},int({}))),0u,uint(textureQueryLevels({})));", inst,
472 texture, lod, texture);
473 case TextureType::ColorArray2D:
474 case TextureType::Color3D:
475 case TextureType::ColorArrayCube:
476 return ctx.AddU32x4(
477 "{}=uvec4(uvec3(textureSize({},int({}))),uint(textureQueryLevels({})));", inst, texture,
478 lod, texture);
479 case TextureType::Buffer:
480 throw NotImplementedException("EmitImageQueryDimensions Texture buffers");
481 }
482 throw LogicError("Unspecified image type {}", info.type.Value());
483}
484
485void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
486 std::string_view coords) {
487 const auto info{inst.Flags<IR::TextureInstInfo>()};
488 const auto texture{Texture(ctx, info, index)};
489 return ctx.AddF32x4("{}=vec4(textureQueryLod({},{}),0.0,0.0);", inst, texture, coords);
490}
491
492void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
493 std::string_view coords, const IR::Value& derivatives,
494 const IR::Value& offset, [[maybe_unused]] const IR::Value& lod_clamp) {
495 const auto info{inst.Flags<IR::TextureInstInfo>()};
496 if (info.has_lod_clamp) {
497 throw NotImplementedException("EmitImageGradient Lod clamp samples");
498 }
499 const auto sparse_inst{PrepareSparse(inst)};
500 if (sparse_inst) {
501 throw NotImplementedException("EmitImageGradient Sparse");
502 }
503 if (!offset.IsEmpty()) {
504 throw NotImplementedException("EmitImageGradient offset");
505 }
506 const auto texture{Texture(ctx, info, index)};
507 const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
508 const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
509 const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)};
510 if (multi_component) {
511 ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords,
512 derivatives_vec, derivatives_vec);
513 } else {
514 ctx.Add("{}=textureGrad({},{},float({}.x),float({}.y));", texel, texture, coords,
515 derivatives_vec, derivatives_vec);
516 }
517}
518
519void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
520 std::string_view coords) {
521 const auto info{inst.Flags<IR::TextureInstInfo>()};
522 const auto sparse_inst{PrepareSparse(inst)};
523 if (sparse_inst) {
524 throw NotImplementedException("EmitImageRead Sparse");
525 }
526 const auto image{Image(ctx, info, index)};
527 ctx.AddU32x4("{}=uvec4(imageLoad({},{}));", inst, image, CoordsCastToInt(coords, info));
528}
529
530void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
531 std::string_view coords, std::string_view color) {
532 const auto info{inst.Flags<IR::TextureInstInfo>()};
533 const auto image{Image(ctx, info, index)};
534 ctx.Add("imageStore({},{},{});", image, CoordsCastToInt(coords, info), color);
535}
536
537void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
538 std::string_view coords, std::string_view value) {
539 const auto info{inst.Flags<IR::TextureInstInfo>()};
540 const auto image{Image(ctx, info, index)};
541 ctx.AddU32("{}=imageAtomicAdd({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
542}
543
544void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
545 std::string_view coords, std::string_view value) {
546 const auto info{inst.Flags<IR::TextureInstInfo>()};
547 const auto image{Image(ctx, info, index)};
548 ctx.AddU32("{}=imageAtomicMin({},{},int({}));", inst, image, CoordsCastToInt(coords, info),
549 value);
550}
551
552void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
553 std::string_view coords, std::string_view value) {
554 const auto info{inst.Flags<IR::TextureInstInfo>()};
555 const auto image{Image(ctx, info, index)};
556 ctx.AddU32("{}=imageAtomicMin({},{},uint({}));", inst, image, CoordsCastToInt(coords, info),
557 value);
558}
559
560void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
561 std::string_view coords, std::string_view value) {
562 const auto info{inst.Flags<IR::TextureInstInfo>()};
563 const auto image{Image(ctx, info, index)};
564 ctx.AddU32("{}=imageAtomicMax({},{},int({}));", inst, image, CoordsCastToInt(coords, info),
565 value);
566}
567
568void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
569 std::string_view coords, std::string_view value) {
570 const auto info{inst.Flags<IR::TextureInstInfo>()};
571 const auto image{Image(ctx, info, index)};
572 ctx.AddU32("{}=imageAtomicMax({},{},uint({}));", inst, image, CoordsCastToInt(coords, info),
573 value);
574}
575
576void EmitImageAtomicInc32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view,
577 std::string_view) {
578 NotImplemented();
579}
580
581void EmitImageAtomicDec32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view,
582 std::string_view) {
583 NotImplemented();
584}
585
586void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
587 std::string_view coords, std::string_view value) {
588 const auto info{inst.Flags<IR::TextureInstInfo>()};
589 const auto image{Image(ctx, info, index)};
590 ctx.AddU32("{}=imageAtomicAnd({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
591}
592
593void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
594 std::string_view coords, std::string_view value) {
595 const auto info{inst.Flags<IR::TextureInstInfo>()};
596 const auto image{Image(ctx, info, index)};
597 ctx.AddU32("{}=imageAtomicOr({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
598}
599
600void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
601 std::string_view coords, std::string_view value) {
602 const auto info{inst.Flags<IR::TextureInstInfo>()};
603 const auto image{Image(ctx, info, index)};
604 ctx.AddU32("{}=imageAtomicXor({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
605}
606
607void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
608 std::string_view coords, std::string_view value) {
609 const auto info{inst.Flags<IR::TextureInstInfo>()};
610 const auto image{Image(ctx, info, index)};
611 ctx.AddU32("{}=imageAtomicExchange({},{},{});", inst, image, CoordsCastToInt(coords, info),
612 value);
613}
614
615void EmitBindlessImageSampleImplicitLod(EmitContext&) {
616 NotImplemented();
617}
618
619void EmitBindlessImageSampleExplicitLod(EmitContext&) {
620 NotImplemented();
621}
622
623void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
624 NotImplemented();
625}
626
627void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
628 NotImplemented();
629}
630
631void EmitBindlessImageGather(EmitContext&) {
632 NotImplemented();
633}
634
635void EmitBindlessImageGatherDref(EmitContext&) {
636 NotImplemented();
637}
638
639void EmitBindlessImageFetch(EmitContext&) {
640 NotImplemented();
641}
642
643void EmitBindlessImageQueryDimensions(EmitContext&) {
644 NotImplemented();
645}
646
647void EmitBindlessImageQueryLod(EmitContext&) {
648 NotImplemented();
649}
650
651void EmitBindlessImageGradient(EmitContext&) {
652 NotImplemented();
653}
654
655void EmitBindlessImageRead(EmitContext&) {
656 NotImplemented();
657}
658
659void EmitBindlessImageWrite(EmitContext&) {
660 NotImplemented();
661}
662
663void EmitBoundImageSampleImplicitLod(EmitContext&) {
664 NotImplemented();
665}
666
667void EmitBoundImageSampleExplicitLod(EmitContext&) {
668 NotImplemented();
669}
670
671void EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
672 NotImplemented();
673}
674
675void EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
676 NotImplemented();
677}
678
679void EmitBoundImageGather(EmitContext&) {
680 NotImplemented();
681}
682
683void EmitBoundImageGatherDref(EmitContext&) {
684 NotImplemented();
685}
686
687void EmitBoundImageFetch(EmitContext&) {
688 NotImplemented();
689}
690
691void EmitBoundImageQueryDimensions(EmitContext&) {
692 NotImplemented();
693}
694
695void EmitBoundImageQueryLod(EmitContext&) {
696 NotImplemented();
697}
698
699void EmitBoundImageGradient(EmitContext&) {
700 NotImplemented();
701}
702
703void EmitBoundImageRead(EmitContext&) {
704 NotImplemented();
705}
706
707void EmitBoundImageWrite(EmitContext&) {
708 NotImplemented();
709}
710
711void EmitBindlessImageAtomicIAdd32(EmitContext&) {
712 NotImplemented();
713}
714
715void EmitBindlessImageAtomicSMin32(EmitContext&) {
716 NotImplemented();
717}
718
719void EmitBindlessImageAtomicUMin32(EmitContext&) {
720 NotImplemented();
721}
722
723void EmitBindlessImageAtomicSMax32(EmitContext&) {
724 NotImplemented();
725}
726
727void EmitBindlessImageAtomicUMax32(EmitContext&) {
728 NotImplemented();
729}
730
731void EmitBindlessImageAtomicInc32(EmitContext&) {
732 NotImplemented();
733}
734
735void EmitBindlessImageAtomicDec32(EmitContext&) {
736 NotImplemented();
737}
738
739void EmitBindlessImageAtomicAnd32(EmitContext&) {
740 NotImplemented();
741}
742
743void EmitBindlessImageAtomicOr32(EmitContext&) {
744 NotImplemented();
745}
746
747void EmitBindlessImageAtomicXor32(EmitContext&) {
748 NotImplemented();
749}
750
751void EmitBindlessImageAtomicExchange32(EmitContext&) {
752 NotImplemented();
753}
754
755void EmitBoundImageAtomicIAdd32(EmitContext&) {
756 NotImplemented();
757}
758
759void EmitBoundImageAtomicSMin32(EmitContext&) {
760 NotImplemented();
761}
762
763void EmitBoundImageAtomicUMin32(EmitContext&) {
764 NotImplemented();
765}
766
767void EmitBoundImageAtomicSMax32(EmitContext&) {
768 NotImplemented();
769}
770
771void EmitBoundImageAtomicUMax32(EmitContext&) {
772 NotImplemented();
773}
774
775void EmitBoundImageAtomicInc32(EmitContext&) {
776 NotImplemented();
777}
778
779void EmitBoundImageAtomicDec32(EmitContext&) {
780 NotImplemented();
781}
782
783void EmitBoundImageAtomicAnd32(EmitContext&) {
784 NotImplemented();
785}
786
787void EmitBoundImageAtomicOr32(EmitContext&) {
788 NotImplemented();
789}
790
791void EmitBoundImageAtomicXor32(EmitContext&) {
792 NotImplemented();
793}
794
795void EmitBoundImageAtomicExchange32(EmitContext&) {
796 NotImplemented();
797}
798
799} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
new file mode 100644
index 000000000..5936d086f
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
@@ -0,0 +1,702 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string_view>
8
9#include "common/common_types.h"
10
11namespace Shader::IR {
12enum class Attribute : u64;
13enum class Patch : u64;
14class Inst;
15class Value;
16} // namespace Shader::IR
17
18namespace Shader::Backend::GLSL {
19class EmitContext;
20
21#define NotImplemented() throw NotImplementedException("GLSL instruction {}", __func__)
22
23// Microinstruction emitters
24void EmitPhi(EmitContext& ctx, IR::Inst& inst);
25void EmitVoid(EmitContext& ctx);
26void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
27void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
28void EmitReference(EmitContext& ctx, const IR::Value& value);
29void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value);
30void EmitJoin(EmitContext& ctx);
31void EmitDemoteToHelperInvocation(EmitContext& ctx);
32void EmitBarrier(EmitContext& ctx);
33void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
34void EmitDeviceMemoryBarrier(EmitContext& ctx);
35void EmitPrologue(EmitContext& ctx);
36void EmitEpilogue(EmitContext& ctx);
37void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream);
38void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);
39void EmitGetRegister(EmitContext& ctx);
40void EmitSetRegister(EmitContext& ctx);
41void EmitGetPred(EmitContext& ctx);
42void EmitSetPred(EmitContext& ctx);
43void EmitSetGotoVariable(EmitContext& ctx);
44void EmitGetGotoVariable(EmitContext& ctx);
45void EmitSetIndirectBranchVariable(EmitContext& ctx);
46void EmitGetIndirectBranchVariable(EmitContext& ctx);
47void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
48 const IR::Value& offset);
49void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
50 const IR::Value& offset);
51void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
52 const IR::Value& offset);
53void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
54 const IR::Value& offset);
55void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
56 const IR::Value& offset);
57void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
58 const IR::Value& offset);
59void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
60 const IR::Value& offset);
61void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
62 std::string_view vertex);
63void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
64 std::string_view vertex);
65void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
66 std::string_view vertex);
67void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value,
68 std::string_view vertex);
69void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch);
70void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value);
71void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value);
72void EmitSetSampleMask(EmitContext& ctx, std::string_view value);
73void EmitSetFragDepth(EmitContext& ctx, std::string_view value);
74void EmitGetZFlag(EmitContext& ctx);
75void EmitGetSFlag(EmitContext& ctx);
76void EmitGetCFlag(EmitContext& ctx);
77void EmitGetOFlag(EmitContext& ctx);
78void EmitSetZFlag(EmitContext& ctx);
79void EmitSetSFlag(EmitContext& ctx);
80void EmitSetCFlag(EmitContext& ctx);
81void EmitSetOFlag(EmitContext& ctx);
82void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst);
83void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst);
84void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
85void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
86void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
87void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
88void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset);
89void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value);
90void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
91void EmitUndefU8(EmitContext& ctx, IR::Inst& inst);
92void EmitUndefU16(EmitContext& ctx, IR::Inst& inst);
93void EmitUndefU32(EmitContext& ctx, IR::Inst& inst);
94void EmitUndefU64(EmitContext& ctx, IR::Inst& inst);
95void EmitLoadGlobalU8(EmitContext& ctx);
96void EmitLoadGlobalS8(EmitContext& ctx);
97void EmitLoadGlobalU16(EmitContext& ctx);
98void EmitLoadGlobalS16(EmitContext& ctx);
99void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address);
100void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address);
101void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address);
102void EmitWriteGlobalU8(EmitContext& ctx);
103void EmitWriteGlobalS8(EmitContext& ctx);
104void EmitWriteGlobalU16(EmitContext& ctx);
105void EmitWriteGlobalS16(EmitContext& ctx);
106void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value);
107void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value);
108void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value);
109void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
110 const IR::Value& offset);
111void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
112 const IR::Value& offset);
113void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
114 const IR::Value& offset);
115void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
116 const IR::Value& offset);
117void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
118 const IR::Value& offset);
119void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
120 const IR::Value& offset);
121void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
122 const IR::Value& offset);
123void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
124 std::string_view value);
125void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
126 std::string_view value);
127void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
128 std::string_view value);
129void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
130 std::string_view value);
131void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
132 std::string_view value);
133void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
134 std::string_view value);
135void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
136 std::string_view value);
137void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
138void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
139void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
140void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
141void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
142void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
143void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
144void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value);
145void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value);
146void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value);
147void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value);
148void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value);
149void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
150 std::string_view e2);
151void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
152 std::string_view e2, std::string_view e3);
153void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
154 std::string_view e2, std::string_view e3, std::string_view e4);
155void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
156 u32 index);
157void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
158 u32 index);
159void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
160 u32 index);
161void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
162 std::string_view object, u32 index);
163void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
164 std::string_view object, u32 index);
165void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
166 std::string_view object, u32 index);
167void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2);
168void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2,
169 std::string_view e3);
170void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2,
171 std::string_view e3, std::string_view e4);
172void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index);
173void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index);
174void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index);
175void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object,
176 u32 index);
177void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object,
178 u32 index);
179void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object,
180 u32 index);
181void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
182 std::string_view e2);
183void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
184 std::string_view e2, std::string_view e3);
185void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
186 std::string_view e2, std::string_view e3, std::string_view e4);
187void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
188 u32 index);
189void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
190 u32 index);
191void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
192 u32 index);
193void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
194 std::string_view object, u32 index);
195void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
196 std::string_view object, u32 index);
197void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
198 std::string_view object, u32 index);
199void EmitCompositeConstructF64x2(EmitContext& ctx);
200void EmitCompositeConstructF64x3(EmitContext& ctx);
201void EmitCompositeConstructF64x4(EmitContext& ctx);
202void EmitCompositeExtractF64x2(EmitContext& ctx);
203void EmitCompositeExtractF64x3(EmitContext& ctx);
204void EmitCompositeExtractF64x4(EmitContext& ctx);
205void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object,
206 u32 index);
207void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object,
208 u32 index);
209void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object,
210 u32 index);
211void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
212 std::string_view true_value, std::string_view false_value);
213void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value,
214 std::string_view false_value);
215void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value,
216 std::string_view false_value);
217void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
218 std::string_view true_value, std::string_view false_value);
219void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
220 std::string_view true_value, std::string_view false_value);
221void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value,
222 std::string_view false_value);
223void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
224 std::string_view true_value, std::string_view false_value);
225void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
226 std::string_view true_value, std::string_view false_value);
227void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst);
228void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
229void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
230void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst);
231void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
232void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
233void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
234void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
235void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
236void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
237void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
238void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
239void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
240void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
241void EmitGetZeroFromOp(EmitContext& ctx);
242void EmitGetSignFromOp(EmitContext& ctx);
243void EmitGetCarryFromOp(EmitContext& ctx);
244void EmitGetOverflowFromOp(EmitContext& ctx);
245void EmitGetSparseFromOp(EmitContext& ctx);
246void EmitGetInBoundsFromOp(EmitContext& ctx);
247void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
248void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
249void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
250void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
251void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
252void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
253void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
254 std::string_view c);
255void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
256 std::string_view c);
257void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
258 std::string_view c);
259void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
260void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
261void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
262void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
263void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
264void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
265void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
266void EmitFPNeg16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
267void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
268void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
269void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value);
270void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value);
271void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value);
272void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value);
273void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
274void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
275void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
276void EmitFPRecipSqrt64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
277void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value);
278void EmitFPSaturate16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
279void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
280void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
281void EmitFPClamp16(EmitContext& ctx, IR::Inst& inst, std::string_view value,
282 std::string_view min_value, std::string_view max_value);
283void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value,
284 std::string_view min_value, std::string_view max_value);
285void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value,
286 std::string_view min_value, std::string_view max_value);
287void EmitFPRoundEven16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
288void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
289void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
290void EmitFPFloor16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
291void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
292void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
293void EmitFPCeil16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
294void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
295void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
296void EmitFPTrunc16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
297void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
298void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
299void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
300void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
301void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
302void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
303void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
304 std::string_view rhs);
305void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
306 std::string_view rhs);
307void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
308void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
309 std::string_view rhs);
310void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
311 std::string_view rhs);
312void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
313void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
314 std::string_view rhs);
315void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
316 std::string_view rhs);
317void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
318void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
319 std::string_view rhs);
320void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
321 std::string_view rhs);
322void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
323void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
324 std::string_view rhs);
325void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
326 std::string_view rhs);
327void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
328void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
329 std::string_view rhs);
330void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
331 std::string_view rhs);
332void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
333void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
334 std::string_view rhs);
335void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
336 std::string_view rhs);
337void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
338void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
339 std::string_view rhs);
340void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
341 std::string_view rhs);
342void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
343void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
344 std::string_view rhs);
345void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
346 std::string_view rhs);
347void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
348void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
349 std::string_view rhs);
350void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
351 std::string_view rhs);
352void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
353void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
354 std::string_view rhs);
355void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
356 std::string_view rhs);
357void EmitFPIsNan16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
358void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
359void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
360void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
361void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
362void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
363void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
364void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
365void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
366void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
367void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
368void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
369 std::string_view shift);
370void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
371 std::string_view shift);
372void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
373 std::string_view shift);
374void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
375 std::string_view shift);
376void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
377 std::string_view shift);
378void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
379 std::string_view shift);
380void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
381void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
382void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
383void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base,
384 std::string_view insert, std::string_view offset, std::string_view count);
385void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
386 std::string_view offset, std::string_view count);
387void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
388 std::string_view offset, std::string_view count);
389void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
390void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
391void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
392void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
393void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
394void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
395void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
396void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
397void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
398void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
399 std::string_view max);
400void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
401 std::string_view max);
402void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
403void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
404void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
405void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
406 std::string_view rhs);
407void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
408 std::string_view rhs);
409void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
410void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
411void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
412void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
413 std::string_view rhs);
414void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
415 std::string_view rhs);
416void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
417 std::string_view value);
418void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
419 std::string_view value);
420void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
421 std::string_view value);
422void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
423 std::string_view value);
424void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
425 std::string_view value);
426void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
427 std::string_view value);
428void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
429 std::string_view value);
430void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
431 std::string_view value);
432void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
433 std::string_view value);
434void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
435 std::string_view value);
436void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
437 std::string_view value);
438void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
439 std::string_view value);
440void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
441 const IR::Value& offset, std::string_view value);
442void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
443 const IR::Value& offset, std::string_view value);
444void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
445 const IR::Value& offset, std::string_view value);
446void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
447 const IR::Value& offset, std::string_view value);
448void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
449 const IR::Value& offset, std::string_view value);
450void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
451 const IR::Value& offset, std::string_view value);
452void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
453 const IR::Value& offset, std::string_view value);
454void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
455 const IR::Value& offset, std::string_view value);
456void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
457 const IR::Value& offset, std::string_view value);
458void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
459 const IR::Value& offset, std::string_view value);
460void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
461 const IR::Value& offset, std::string_view value);
462void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
463 const IR::Value& offset, std::string_view value);
464void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
465 const IR::Value& offset, std::string_view value);
466void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
467 const IR::Value& offset, std::string_view value);
468void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
469 const IR::Value& offset, std::string_view value);
470void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
471 const IR::Value& offset, std::string_view value);
472void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
473 const IR::Value& offset, std::string_view value);
474void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
475 const IR::Value& offset, std::string_view value);
476void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
477 const IR::Value& offset, std::string_view value);
478void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
479 const IR::Value& offset, std::string_view value);
480void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
481 const IR::Value& offset, std::string_view value);
482void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
483 const IR::Value& offset, std::string_view value);
484void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
485 const IR::Value& offset, std::string_view value);
486void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
487 const IR::Value& offset, std::string_view value);
488void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
489 const IR::Value& offset, std::string_view value);
490void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
491 const IR::Value& offset, std::string_view value);
492void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
493 const IR::Value& offset, std::string_view value);
494void EmitGlobalAtomicIAdd32(EmitContext& ctx);
495void EmitGlobalAtomicSMin32(EmitContext& ctx);
496void EmitGlobalAtomicUMin32(EmitContext& ctx);
497void EmitGlobalAtomicSMax32(EmitContext& ctx);
498void EmitGlobalAtomicUMax32(EmitContext& ctx);
499void EmitGlobalAtomicInc32(EmitContext& ctx);
500void EmitGlobalAtomicDec32(EmitContext& ctx);
501void EmitGlobalAtomicAnd32(EmitContext& ctx);
502void EmitGlobalAtomicOr32(EmitContext& ctx);
503void EmitGlobalAtomicXor32(EmitContext& ctx);
504void EmitGlobalAtomicExchange32(EmitContext& ctx);
505void EmitGlobalAtomicIAdd64(EmitContext& ctx);
506void EmitGlobalAtomicSMin64(EmitContext& ctx);
507void EmitGlobalAtomicUMin64(EmitContext& ctx);
508void EmitGlobalAtomicSMax64(EmitContext& ctx);
509void EmitGlobalAtomicUMax64(EmitContext& ctx);
510void EmitGlobalAtomicInc64(EmitContext& ctx);
511void EmitGlobalAtomicDec64(EmitContext& ctx);
512void EmitGlobalAtomicAnd64(EmitContext& ctx);
513void EmitGlobalAtomicOr64(EmitContext& ctx);
514void EmitGlobalAtomicXor64(EmitContext& ctx);
515void EmitGlobalAtomicExchange64(EmitContext& ctx);
516void EmitGlobalAtomicAddF32(EmitContext& ctx);
517void EmitGlobalAtomicAddF16x2(EmitContext& ctx);
518void EmitGlobalAtomicAddF32x2(EmitContext& ctx);
519void EmitGlobalAtomicMinF16x2(EmitContext& ctx);
520void EmitGlobalAtomicMinF32x2(EmitContext& ctx);
521void EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
522void EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
523void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
524void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
525void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
526void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value);
527void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
528void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
529void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
530void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
531void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
532void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
533void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
534void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
535void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
536void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
537void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
538void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
539void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
540void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
541void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
542void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
543void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
544void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
545void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
546void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
547void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
548void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
549void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
550void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
551void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
552void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
553void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
554void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
555void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
556void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
557void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
558void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
559void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
560void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
561void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
562void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
563void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
564void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
565void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
566void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
567void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
568void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
569void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
570void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
571void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
572void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
573void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
574void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
575void EmitBindlessImageSampleImplicitLod(EmitContext&);
576void EmitBindlessImageSampleExplicitLod(EmitContext&);
577void EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
578void EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
579void EmitBindlessImageGather(EmitContext&);
580void EmitBindlessImageGatherDref(EmitContext&);
581void EmitBindlessImageFetch(EmitContext&);
582void EmitBindlessImageQueryDimensions(EmitContext&);
583void EmitBindlessImageQueryLod(EmitContext&);
584void EmitBindlessImageGradient(EmitContext&);
585void EmitBindlessImageRead(EmitContext&);
586void EmitBindlessImageWrite(EmitContext&);
587void EmitBoundImageSampleImplicitLod(EmitContext&);
588void EmitBoundImageSampleExplicitLod(EmitContext&);
589void EmitBoundImageSampleDrefImplicitLod(EmitContext&);
590void EmitBoundImageSampleDrefExplicitLod(EmitContext&);
591void EmitBoundImageGather(EmitContext&);
592void EmitBoundImageGatherDref(EmitContext&);
593void EmitBoundImageFetch(EmitContext&);
594void EmitBoundImageQueryDimensions(EmitContext&);
595void EmitBoundImageQueryLod(EmitContext&);
596void EmitBoundImageGradient(EmitContext&);
597void EmitBoundImageRead(EmitContext&);
598void EmitBoundImageWrite(EmitContext&);
599void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
600 std::string_view coords, std::string_view bias_lc,
601 const IR::Value& offset);
602void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
603 std::string_view coords, std::string_view lod_lc,
604 const IR::Value& offset);
605void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
606 std::string_view coords, std::string_view dref,
607 std::string_view bias_lc, const IR::Value& offset);
608void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
609 std::string_view coords, std::string_view dref,
610 std::string_view lod_lc, const IR::Value& offset);
611void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
612 std::string_view coords, const IR::Value& offset, const IR::Value& offset2);
613void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
614 std::string_view coords, const IR::Value& offset, const IR::Value& offset2,
615 std::string_view dref);
616void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
617 std::string_view coords, std::string_view offset, std::string_view lod,
618 std::string_view ms);
619void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
620 std::string_view lod);
621void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
622 std::string_view coords);
623void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
624 std::string_view coords, const IR::Value& derivatives,
625 const IR::Value& offset, const IR::Value& lod_clamp);
626void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
627 std::string_view coords);
628void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
629 std::string_view coords, std::string_view color);
630void EmitBindlessImageAtomicIAdd32(EmitContext&);
631void EmitBindlessImageAtomicSMin32(EmitContext&);
632void EmitBindlessImageAtomicUMin32(EmitContext&);
633void EmitBindlessImageAtomicSMax32(EmitContext&);
634void EmitBindlessImageAtomicUMax32(EmitContext&);
635void EmitBindlessImageAtomicInc32(EmitContext&);
636void EmitBindlessImageAtomicDec32(EmitContext&);
637void EmitBindlessImageAtomicAnd32(EmitContext&);
638void EmitBindlessImageAtomicOr32(EmitContext&);
639void EmitBindlessImageAtomicXor32(EmitContext&);
640void EmitBindlessImageAtomicExchange32(EmitContext&);
641void EmitBoundImageAtomicIAdd32(EmitContext&);
642void EmitBoundImageAtomicSMin32(EmitContext&);
643void EmitBoundImageAtomicUMin32(EmitContext&);
644void EmitBoundImageAtomicSMax32(EmitContext&);
645void EmitBoundImageAtomicUMax32(EmitContext&);
646void EmitBoundImageAtomicInc32(EmitContext&);
647void EmitBoundImageAtomicDec32(EmitContext&);
648void EmitBoundImageAtomicAnd32(EmitContext&);
649void EmitBoundImageAtomicOr32(EmitContext&);
650void EmitBoundImageAtomicXor32(EmitContext&);
651void EmitBoundImageAtomicExchange32(EmitContext&);
652void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
653 std::string_view coords, std::string_view value);
654void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
655 std::string_view coords, std::string_view value);
656void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
657 std::string_view coords, std::string_view value);
658void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
659 std::string_view coords, std::string_view value);
660void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
661 std::string_view coords, std::string_view value);
662void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
663 std::string_view coords, std::string_view value);
664void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
665 std::string_view coords, std::string_view value);
666void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
667 std::string_view coords, std::string_view value);
668void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
669 std::string_view coords, std::string_view value);
670void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
671 std::string_view coords, std::string_view value);
672void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
673 std::string_view coords, std::string_view value);
674void EmitLaneId(EmitContext& ctx, IR::Inst& inst);
675void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
676void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
677void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
678void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
679void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst);
680void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst);
681void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst);
682void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst);
683void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst);
684void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
685 std::string_view index, std::string_view clamp,
686 std::string_view segmentation_mask);
687void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
688 std::string_view clamp, std::string_view segmentation_mask);
689void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
690 std::string_view index, std::string_view clamp,
691 std::string_view segmentation_mask);
692void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
693 std::string_view index, std::string_view clamp,
694 std::string_view segmentation_mask);
695void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b,
696 std::string_view swizzle);
697void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
698void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
699void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
700void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
701
702} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
new file mode 100644
index 000000000..38419f88f
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
@@ -0,0 +1,253 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::Backend::GLSL {
12namespace {
13void SetZeroFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) {
14 IR::Inst* const zero{inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)};
15 if (!zero) {
16 return;
17 }
18 ctx.AddU1("{}={}==0;", *zero, result);
19 zero->Invalidate();
20}
21
22void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) {
23 IR::Inst* const sign{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)};
24 if (!sign) {
25 return;
26 }
27 ctx.AddU1("{}=int({})<0;", *sign, result);
28 sign->Invalidate();
29}
30
31void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
32 char lop) {
33 const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
34 ctx.Add("{}={}{}{};", result, a, lop, b);
35 SetZeroFlag(ctx, inst, result);
36 SetSignFlag(ctx, inst, result);
37}
38} // Anonymous namespace
39
40void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
41 // Compute the overflow CC first as it requires the original operand values,
42 // which may be overwritten by the result of the addition
43 if (IR::Inst * overflow{inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) {
44 // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c
45 constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())};
46 const auto sub_a{fmt::format("{}u-{}", s32_max, a)};
47 const auto positive_result{fmt::format("int({})>int({})", b, sub_a)};
48 const auto negative_result{fmt::format("int({})<int({})", b, sub_a)};
49 ctx.AddU1("{}=int({})>=0?{}:{};", *overflow, a, positive_result, negative_result);
50 overflow->Invalidate();
51 }
52 const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
53 if (IR::Inst* const carry{inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) {
54 ctx.uses_cc_carry = true;
55 ctx.Add("{}=uaddCarry({},{},carry);", result, a, b);
56 ctx.AddU1("{}=carry!=0;", *carry);
57 carry->Invalidate();
58 } else {
59 ctx.Add("{}={}+{};", result, a, b);
60 }
61 SetZeroFlag(ctx, inst, result);
62 SetSignFlag(ctx, inst, result);
63}
64
65void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
66 ctx.AddU64("{}={}+{};", inst, a, b);
67}
68
69void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
70 ctx.AddU32("{}={}-{};", inst, a, b);
71}
72
73void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
74 ctx.AddU64("{}={}-{};", inst, a, b);
75}
76
77void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
78 ctx.AddU32("{}=uint({}*{});", inst, a, b);
79}
80
81void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
82 ctx.AddU32("{}=uint(-({}));", inst, value);
83}
84
85void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
86 ctx.AddU64("{}=-({});", inst, value);
87}
88
89void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
90 ctx.AddU32("{}=abs(int({}));", inst, value);
91}
92
93void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
94 std::string_view shift) {
95 ctx.AddU32("{}={}<<{};", inst, base, shift);
96}
97
98void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
99 std::string_view shift) {
100 ctx.AddU64("{}={}<<{};", inst, base, shift);
101}
102
103void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
104 std::string_view shift) {
105 ctx.AddU32("{}={}>>{};", inst, base, shift);
106}
107
108void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
109 std::string_view shift) {
110 ctx.AddU64("{}={}>>{};", inst, base, shift);
111}
112
113void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
114 std::string_view shift) {
115 ctx.AddU32("{}=int({})>>{};", inst, base, shift);
116}
117
118void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
119 std::string_view shift) {
120 ctx.AddU64("{}=int64_t({})>>{};", inst, base, shift);
121}
122
123void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
124 BitwiseLogicalOp(ctx, inst, a, b, '&');
125}
126
127void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
128 BitwiseLogicalOp(ctx, inst, a, b, '|');
129}
130
131void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
132 BitwiseLogicalOp(ctx, inst, a, b, '^');
133}
134
135void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base,
136 std::string_view insert, std::string_view offset, std::string_view count) {
137 ctx.AddU32("{}=bitfieldInsert({},{},int({}),int({}));", inst, base, insert, offset, count);
138}
139
140void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
141 std::string_view offset, std::string_view count) {
142 const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
143 ctx.Add("{}=uint(bitfieldExtract(int({}),int({}),int({})));", result, base, offset, count);
144 SetZeroFlag(ctx, inst, result);
145 SetSignFlag(ctx, inst, result);
146}
147
148void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
149 std::string_view offset, std::string_view count) {
150 const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
151 ctx.Add("{}=uint(bitfieldExtract(uint({}),int({}),int({})));", result, base, offset, count);
152 SetZeroFlag(ctx, inst, result);
153 SetSignFlag(ctx, inst, result);
154}
155
156void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
157 ctx.AddU32("{}=bitfieldReverse({});", inst, value);
158}
159
160void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
161 ctx.AddU32("{}=bitCount({});", inst, value);
162}
163
164void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
165 ctx.AddU32("{}=~{};", inst, value);
166}
167
168void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
169 ctx.AddU32("{}=findMSB(int({}));", inst, value);
170}
171
172void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
173 ctx.AddU32("{}=findMSB(uint({}));", inst, value);
174}
175
176void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
177 ctx.AddU32("{}=min(int({}),int({}));", inst, a, b);
178}
179
180void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
181 ctx.AddU32("{}=min(uint({}),uint({}));", inst, a, b);
182}
183
184void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
185 ctx.AddU32("{}=max(int({}),int({}));", inst, a, b);
186}
187
188void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
189 ctx.AddU32("{}=max(uint({}),uint({}));", inst, a, b);
190}
191
192void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
193 std::string_view max) {
194 const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
195 ctx.Add("{}=clamp(int({}),int({}),int({}));", result, value, min, max);
196 SetZeroFlag(ctx, inst, result);
197 SetSignFlag(ctx, inst, result);
198}
199
200void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
201 std::string_view max) {
202 const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
203 ctx.Add("{}=clamp(uint({}),uint({}),uint({}));", result, value, min, max);
204 SetZeroFlag(ctx, inst, result);
205 SetSignFlag(ctx, inst, result);
206}
207
208void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
209 ctx.AddU1("{}=int({})<int({});", inst, lhs, rhs);
210}
211
212void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
213 ctx.AddU1("{}=uint({})<uint({});", inst, lhs, rhs);
214}
215
216void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
217 ctx.AddU1("{}={}=={};", inst, lhs, rhs);
218}
219
220void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
221 std::string_view rhs) {
222 ctx.AddU1("{}=int({})<=int({});", inst, lhs, rhs);
223}
224
225void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
226 std::string_view rhs) {
227 ctx.AddU1("{}=uint({})<=uint({});", inst, lhs, rhs);
228}
229
230void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
231 std::string_view rhs) {
232 ctx.AddU1("{}=int({})>int({});", inst, lhs, rhs);
233}
234
235void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
236 std::string_view rhs) {
237 ctx.AddU1("{}=uint({})>uint({});", inst, lhs, rhs);
238}
239
240void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
241 ctx.AddU1("{}={}!={};", inst, lhs, rhs);
242}
243
244void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
245 std::string_view rhs) {
246 ctx.AddU1("{}=int({})>=int({});", inst, lhs, rhs);
247}
248
249void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
250 std::string_view rhs) {
251 ctx.AddU1("{}=uint({})>=uint({});", inst, lhs, rhs);
252}
253} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp
new file mode 100644
index 000000000..338ff4bd6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::Backend::GLSL {
12
13void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
14 ctx.AddU1("{}={}||{};", inst, a, b);
15}
16
17void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
18 ctx.AddU1("{}={}&&{};", inst, a, b);
19}
20
21void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
22 ctx.AddU1("{}={}^^{};", inst, a, b);
23}
24
25void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
26 ctx.AddU1("{}=!{};", inst, value);
27}
28} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp
new file mode 100644
index 000000000..e3957491f
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp
@@ -0,0 +1,202 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10#include "shader_recompiler/profile.h"
11
12namespace Shader::Backend::GLSL {
13namespace {
14constexpr char cas_loop[]{"for(;;){{uint old_value={};uint "
15 "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));"
16 "if(cas_result==old_value){{break;}}}}"};
17
18void SsboWriteCas(EmitContext& ctx, const IR::Value& binding, std::string_view offset_var,
19 std::string_view value, std::string_view bit_offset, u32 num_bits) {
20 const auto ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), offset_var)};
21 ctx.Add(cas_loop, ssbo, ssbo, ssbo, value, bit_offset, num_bits);
22}
23} // Anonymous namespace
24
25void EmitLoadGlobalU8(EmitContext&) {
26 NotImplemented();
27}
28
29void EmitLoadGlobalS8(EmitContext&) {
30 NotImplemented();
31}
32
33void EmitLoadGlobalU16(EmitContext&) {
34 NotImplemented();
35}
36
37void EmitLoadGlobalS16(EmitContext&) {
38 NotImplemented();
39}
40
41void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
42 if (ctx.profile.support_int64) {
43 return ctx.AddU32("{}=LoadGlobal32({});", inst, address);
44 }
45 LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
46 ctx.AddU32("{}=0u;", inst);
47}
48
49void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
50 if (ctx.profile.support_int64) {
51 return ctx.AddU32x2("{}=LoadGlobal64({});", inst, address);
52 }
53 LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
54 ctx.AddU32x2("{}=uvec2(0);", inst);
55}
56
57void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
58 if (ctx.profile.support_int64) {
59 return ctx.AddU32x4("{}=LoadGlobal128({});", inst, address);
60 }
61 LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
62 ctx.AddU32x4("{}=uvec4(0);", inst);
63}
64
65void EmitWriteGlobalU8(EmitContext&) {
66 NotImplemented();
67}
68
69void EmitWriteGlobalS8(EmitContext&) {
70 NotImplemented();
71}
72
73void EmitWriteGlobalU16(EmitContext&) {
74 NotImplemented();
75}
76
77void EmitWriteGlobalS16(EmitContext&) {
78 NotImplemented();
79}
80
81void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) {
82 if (ctx.profile.support_int64) {
83 return ctx.Add("WriteGlobal32({},{});", address, value);
84 }
85 LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
86}
87
88void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) {
89 if (ctx.profile.support_int64) {
90 return ctx.Add("WriteGlobal64({},{});", address, value);
91 }
92 LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
93}
94
95void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) {
96 if (ctx.profile.support_int64) {
97 return ctx.Add("WriteGlobal128({},{});", address, value);
98 }
99 LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
100}
101
102void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
103 const IR::Value& offset) {
104 const auto offset_var{ctx.var_alloc.Consume(offset)};
105 ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int({}%4)*8,8);", inst, ctx.stage_name,
106 binding.U32(), offset_var, offset_var);
107}
108
109void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
110 const IR::Value& offset) {
111 const auto offset_var{ctx.var_alloc.Consume(offset)};
112 ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int({}%4)*8,8);", inst, ctx.stage_name,
113 binding.U32(), offset_var, offset_var);
114}
115
116void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
117 const IR::Value& offset) {
118 const auto offset_var{ctx.var_alloc.Consume(offset)};
119 ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int(({}>>1)%2)*16,16);", inst, ctx.stage_name,
120 binding.U32(), offset_var, offset_var);
121}
122
123void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
124 const IR::Value& offset) {
125 const auto offset_var{ctx.var_alloc.Consume(offset)};
126 ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int(({}>>1)%2)*16,16);", inst,
127 ctx.stage_name, binding.U32(), offset_var, offset_var);
128}
129
130void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
131 const IR::Value& offset) {
132 const auto offset_var{ctx.var_alloc.Consume(offset)};
133 ctx.AddU32("{}={}_ssbo{}[{}>>2];", inst, ctx.stage_name, binding.U32(), offset_var);
134}
135
136void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
137 const IR::Value& offset) {
138 const auto offset_var{ctx.var_alloc.Consume(offset)};
139 ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name,
140 binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var);
141}
142
143void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
144 const IR::Value& offset) {
145 const auto offset_var{ctx.var_alloc.Consume(offset)};
146 ctx.AddU32x4("{}=uvec4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}"
147 "+12)>>2]);",
148 inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(),
149 offset_var, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name,
150 binding.U32(), offset_var);
151}
152
153void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
154 std::string_view value) {
155 const auto offset_var{ctx.var_alloc.Consume(offset)};
156 const auto bit_offset{fmt::format("int({}%4)*8", offset_var)};
157 SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8);
158}
159
160void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
161 std::string_view value) {
162 const auto offset_var{ctx.var_alloc.Consume(offset)};
163 const auto bit_offset{fmt::format("int({}%4)*8", offset_var)};
164 SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8);
165}
166
167void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
168 std::string_view value) {
169 const auto offset_var{ctx.var_alloc.Consume(offset)};
170 const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)};
171 SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16);
172}
173
174void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
175 std::string_view value) {
176 const auto offset_var{ctx.var_alloc.Consume(offset)};
177 const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)};
178 SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16);
179}
180
181void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
182 std::string_view value) {
183 const auto offset_var{ctx.var_alloc.Consume(offset)};
184 ctx.Add("{}_ssbo{}[{}>>2]={};", ctx.stage_name, binding.U32(), offset_var, value);
185}
186
187void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
188 std::string_view value) {
189 const auto offset_var{ctx.var_alloc.Consume(offset)};
190 ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value);
191 ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value);
192}
193
194void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
195 std::string_view value) {
196 const auto offset_var{ctx.var_alloc.Consume(offset)};
197 ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value);
198 ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value);
199 ctx.Add("{}_ssbo{}[({}+8)>>2]={}.z;", ctx.stage_name, binding.U32(), offset_var, value);
200 ctx.Add("{}_ssbo{}[({}+12)>>2]={}.w;", ctx.stage_name, binding.U32(), offset_var, value);
201}
202} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp
new file mode 100644
index 000000000..f420fe388
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp
@@ -0,0 +1,105 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10
11#ifdef _MSC_VER
12#pragma warning(disable : 4100)
13#endif
14
15namespace Shader::Backend::GLSL {
16
17void EmitGetRegister(EmitContext& ctx) {
18 NotImplemented();
19}
20
21void EmitSetRegister(EmitContext& ctx) {
22 NotImplemented();
23}
24
25void EmitGetPred(EmitContext& ctx) {
26 NotImplemented();
27}
28
29void EmitSetPred(EmitContext& ctx) {
30 NotImplemented();
31}
32
33void EmitSetGotoVariable(EmitContext& ctx) {
34 NotImplemented();
35}
36
37void EmitGetGotoVariable(EmitContext& ctx) {
38 NotImplemented();
39}
40
41void EmitSetIndirectBranchVariable(EmitContext& ctx) {
42 NotImplemented();
43}
44
45void EmitGetIndirectBranchVariable(EmitContext& ctx) {
46 NotImplemented();
47}
48
49void EmitGetZFlag(EmitContext& ctx) {
50 NotImplemented();
51}
52
53void EmitGetSFlag(EmitContext& ctx) {
54 NotImplemented();
55}
56
57void EmitGetCFlag(EmitContext& ctx) {
58 NotImplemented();
59}
60
61void EmitGetOFlag(EmitContext& ctx) {
62 NotImplemented();
63}
64
65void EmitSetZFlag(EmitContext& ctx) {
66 NotImplemented();
67}
68
69void EmitSetSFlag(EmitContext& ctx) {
70 NotImplemented();
71}
72
73void EmitSetCFlag(EmitContext& ctx) {
74 NotImplemented();
75}
76
77void EmitSetOFlag(EmitContext& ctx) {
78 NotImplemented();
79}
80
81void EmitGetZeroFromOp(EmitContext& ctx) {
82 NotImplemented();
83}
84
85void EmitGetSignFromOp(EmitContext& ctx) {
86 NotImplemented();
87}
88
89void EmitGetCarryFromOp(EmitContext& ctx) {
90 NotImplemented();
91}
92
93void EmitGetOverflowFromOp(EmitContext& ctx) {
94 NotImplemented();
95}
96
97void EmitGetSparseFromOp(EmitContext& ctx) {
98 NotImplemented();
99}
100
101void EmitGetInBoundsFromOp(EmitContext& ctx) {
102 NotImplemented();
103}
104
105} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp
new file mode 100644
index 000000000..49fba9073
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::Backend::GLSL {
12void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
13 std::string_view true_value, std::string_view false_value) {
14 ctx.AddU1("{}={}?{}:{};", inst, cond, true_value, false_value);
15}
16
17void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond,
18 [[maybe_unused]] std::string_view true_value,
19 [[maybe_unused]] std::string_view false_value) {
20 NotImplemented();
21}
22
23void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond,
24 [[maybe_unused]] std::string_view true_value,
25 [[maybe_unused]] std::string_view false_value) {
26 NotImplemented();
27}
28
29void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
30 std::string_view true_value, std::string_view false_value) {
31 ctx.AddU32("{}={}?{}:{};", inst, cond, true_value, false_value);
32}
33
34void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
35 std::string_view true_value, std::string_view false_value) {
36 ctx.AddU64("{}={}?{}:{};", inst, cond, true_value, false_value);
37}
38
39void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond,
40 [[maybe_unused]] std::string_view true_value,
41 [[maybe_unused]] std::string_view false_value) {
42 NotImplemented();
43}
44
45void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
46 std::string_view true_value, std::string_view false_value) {
47 ctx.AddF32("{}={}?{}:{};", inst, cond, true_value, false_value);
48}
49
50void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
51 std::string_view true_value, std::string_view false_value) {
52 ctx.AddF64("{}={}?{}:{};", inst, cond, true_value, false_value);
53}
54
55} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp
new file mode 100644
index 000000000..518b78f06
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp
@@ -0,0 +1,79 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::Backend::GLSL {
12namespace {
13constexpr char cas_loop[]{"for(;;){{uint old_value={};uint "
14 "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));"
15 "if(cas_result==old_value){{break;}}}}"};
16
17void SharedWriteCas(EmitContext& ctx, std::string_view offset, std::string_view value,
18 std::string_view bit_offset, u32 num_bits) {
19 const auto smem{fmt::format("smem[{}>>2]", offset)};
20 ctx.Add(cas_loop, smem, smem, smem, value, bit_offset, num_bits);
21}
22} // Anonymous namespace
23
24void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
25 ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset);
26}
27
28void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
29 ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int({}%4)*8,8);", inst, offset, offset);
30}
31
32void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
33 ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int(({}>>1)%2)*16,16);", inst, offset, offset);
34}
35
36void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
37 ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int(({}>>1)%2)*16,16);", inst, offset, offset);
38}
39
40void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
41 ctx.AddU32("{}=smem[{}>>2];", inst, offset);
42}
43
44void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
45 ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset);
46}
47
48void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
49 ctx.AddU32x4("{}=uvec4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst,
50 offset, offset, offset, offset);
51}
52
53void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) {
54 const auto bit_offset{fmt::format("int({}%4)*8", offset)};
55 SharedWriteCas(ctx, offset, value, bit_offset, 8);
56}
57
58void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) {
59 const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset)};
60 SharedWriteCas(ctx, offset, value, bit_offset, 16);
61}
62
63void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) {
64 ctx.Add("smem[{}>>2]={};", offset, value);
65}
66
67void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) {
68 ctx.Add("smem[{}>>2]={}.x;", offset, value);
69 ctx.Add("smem[({}+4)>>2]={}.y;", offset, value);
70}
71
72void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) {
73 ctx.Add("smem[{}>>2]={}.x;", offset, value);
74 ctx.Add("smem[({}+4)>>2]={}.y;", offset, value);
75 ctx.Add("smem[({}+8)>>2]={}.z;", offset, value);
76 ctx.Add("smem[({}+12)>>2]={}.w;", offset, value);
77}
78
79} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
new file mode 100644
index 000000000..9b866f889
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
@@ -0,0 +1,111 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/program.h"
10#include "shader_recompiler/frontend/ir/value.h"
11#include "shader_recompiler/profile.h"
12
13namespace Shader::Backend::GLSL {
14namespace {
15std::string_view OutputVertexIndex(EmitContext& ctx) {
16 return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
17}
18
19void InitializeOutputVaryings(EmitContext& ctx) {
20 if (ctx.uses_geometry_passthrough) {
21 return;
22 }
23 if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) {
24 ctx.Add("gl_Position=vec4(0,0,0,1);");
25 }
26 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
27 if (!ctx.info.stores.Generic(index)) {
28 continue;
29 }
30 const auto& info_array{ctx.output_generics.at(index)};
31 const auto output_decorator{OutputVertexIndex(ctx)};
32 size_t element{};
33 while (element < info_array.size()) {
34 const auto& info{info_array.at(element)};
35 const auto varying_name{fmt::format("{}{}", info.name, output_decorator)};
36 switch (info.num_components) {
37 case 1: {
38 const char value{element == 3 ? '1' : '0'};
39 ctx.Add("{}={}.f;", varying_name, value);
40 break;
41 }
42 case 2:
43 case 3:
44 if (element + info.num_components < 4) {
45 ctx.Add("{}=vec{}(0);", varying_name, info.num_components);
46 } else {
47 // last element is the w component, must be initialized to 1
48 const auto zeros{info.num_components == 3 ? "0,0," : "0,"};
49 ctx.Add("{}=vec{}({}1);", varying_name, info.num_components, zeros);
50 }
51 break;
52 case 4:
53 ctx.Add("{}=vec4(0,0,0,1);", varying_name);
54 break;
55 default:
56 break;
57 }
58 element += info.num_components;
59 }
60 }
61}
62} // Anonymous namespace
63
64void EmitPhi(EmitContext& ctx, IR::Inst& phi) {
65 const size_t num_args{phi.NumArgs()};
66 for (size_t i = 0; i < num_args; ++i) {
67 ctx.var_alloc.Consume(phi.Arg(i));
68 }
69 if (!phi.Definition<Id>().is_valid) {
70 // The phi node wasn't forward defined
71 ctx.var_alloc.PhiDefine(phi, phi.Arg(0).Type());
72 }
73}
74
75void EmitVoid(EmitContext&) {}
76
77void EmitReference(EmitContext& ctx, const IR::Value& value) {
78 ctx.var_alloc.Consume(value);
79}
80
81void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) {
82 IR::Inst& phi{*phi_value.InstRecursive()};
83 const auto phi_type{phi.Arg(0).Type()};
84 if (!phi.Definition<Id>().is_valid) {
85 // The phi node wasn't forward defined
86 ctx.var_alloc.PhiDefine(phi, phi_type);
87 }
88 const auto phi_reg{ctx.var_alloc.Consume(IR::Value{&phi})};
89 const auto val_reg{ctx.var_alloc.Consume(value)};
90 if (phi_reg == val_reg) {
91 return;
92 }
93 ctx.Add("{}={};", phi_reg, val_reg);
94}
95
96void EmitPrologue(EmitContext& ctx) {
97 InitializeOutputVaryings(ctx);
98}
99
100void EmitEpilogue(EmitContext&) {}
101
102void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
103 ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream));
104 InitializeOutputVaryings(ctx);
105}
106
107void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
108 ctx.Add("EndStreamPrimitive(int({}));", ctx.var_alloc.Consume(stream));
109}
110
111} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp
new file mode 100644
index 000000000..15bf02dd6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp
@@ -0,0 +1,32 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9
10namespace Shader::Backend::GLSL {
11
12void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) {
13 ctx.AddU1("{}=false;", inst);
14}
15
16void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) {
17 ctx.AddU32("{}=0u;", inst);
18}
19
20void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) {
21 ctx.AddU32("{}=0u;", inst);
22}
23
24void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) {
25 ctx.AddU32("{}=0u;", inst);
26}
27
28void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) {
29 ctx.AddU64("{}=0u;", inst);
30}
31
32} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
new file mode 100644
index 000000000..a982dd8a2
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
@@ -0,0 +1,217 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10#include "shader_recompiler/profile.h"
11
12namespace Shader::Backend::GLSL {
13namespace {
14void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) {
15 IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
16 if (!in_bounds) {
17 return;
18 }
19 ctx.AddU1("{}=shfl_in_bounds;", *in_bounds);
20 in_bounds->Invalidate();
21}
22
23std::string ComputeMinThreadId(std::string_view thread_id, std::string_view segmentation_mask) {
24 return fmt::format("({}&{})", thread_id, segmentation_mask);
25}
26
27std::string ComputeMaxThreadId(std::string_view min_thread_id, std::string_view clamp,
28 std::string_view not_seg_mask) {
29 return fmt::format("({})|({}&{})", min_thread_id, clamp, not_seg_mask);
30}
31
32std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp,
33 std::string_view segmentation_mask) {
34 const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)};
35 const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)};
36 return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask);
37}
38
39void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op,
40 std::string_view value, std::string_view index,
41 [[maybe_unused]] std::string_view clamp, std::string_view segmentation_mask) {
42 const auto width{fmt::format("32u>>(bitCount({}&31u))", segmentation_mask)};
43 ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width);
44 SetInBoundsFlag(ctx, inst);
45}
46} // Anonymous namespace
47
48void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
49 ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst);
50}
51
52void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
53 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
54 ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
55 } else {
56 const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
57 const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
58 ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
59 }
60}
61
62void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
63 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
64 ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
65 } else {
66 const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
67 const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
68 ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
69 }
70}
71
72void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
73 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
74 ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
75 } else {
76 const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
77 const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
78 const auto value{fmt::format("({}^{})", ballot, active_mask)};
79 ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
80 }
81}
82
83void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
84 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
85 ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred);
86 } else {
87 ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred);
88 }
89}
90
91void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
92 ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst);
93}
94
95void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
96 ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst);
97}
98
99void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
100 ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst);
101}
102
103void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
104 ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst);
105}
106
107void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
108 ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst);
109}
110
111void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
112 std::string_view index, std::string_view clamp,
113 std::string_view segmentation_mask) {
114 if (ctx.profile.support_gl_warp_intrinsics) {
115 UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask);
116 return;
117 }
118 const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)};
119 const auto thread_id{"gl_SubGroupInvocationARB"};
120 const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)};
121 const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)};
122
123 const auto lhs{fmt::format("({}&{})", index, not_seg_mask)};
124 const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)};
125 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
126 SetInBoundsFlag(ctx, inst);
127 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
128}
129
130void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
131 std::string_view clamp, std::string_view segmentation_mask) {
132 if (ctx.profile.support_gl_warp_intrinsics) {
133 UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask);
134 return;
135 }
136 const auto thread_id{"gl_SubGroupInvocationARB"};
137 const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
138 const auto src_thread_id{fmt::format("({}-{})", thread_id, index)};
139 ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id);
140 SetInBoundsFlag(ctx, inst);
141 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
142}
143
144void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
145 std::string_view index, std::string_view clamp,
146 std::string_view segmentation_mask) {
147 if (ctx.profile.support_gl_warp_intrinsics) {
148 UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask);
149 return;
150 }
151 const auto thread_id{"gl_SubGroupInvocationARB"};
152 const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
153 const auto src_thread_id{fmt::format("({}+{})", thread_id, index)};
154 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
155 SetInBoundsFlag(ctx, inst);
156 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
157}
158
159void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
160 std::string_view index, std::string_view clamp,
161 std::string_view segmentation_mask) {
162 if (ctx.profile.support_gl_warp_intrinsics) {
163 UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask);
164 return;
165 }
166 const auto thread_id{"gl_SubGroupInvocationARB"};
167 const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
168 const auto src_thread_id{fmt::format("({}^{})", thread_id, index)};
169 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
170 SetInBoundsFlag(ctx, inst);
171 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
172}
173
174void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b,
175 std::string_view swizzle) {
176 const auto mask{fmt::format("({}>>((gl_SubGroupInvocationARB&3)<<1))&3", swizzle)};
177 const std::string modifier_a = fmt::format("FSWZ_A[{}]", mask);
178 const std::string modifier_b = fmt::format("FSWZ_B[{}]", mask);
179 ctx.AddF32("{}=({}*{})+({}*{});", inst, op_a, modifier_a, op_b, modifier_b);
180}
181
182void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
183 if (ctx.profile.support_gl_derivative_control) {
184 ctx.AddF32("{}=dFdxFine({});", inst, op_a);
185 } else {
186 LOG_WARNING(Shader_GLSL, "Device does not support dFdxFine, fallback to dFdx");
187 ctx.AddF32("{}=dFdx({});", inst, op_a);
188 }
189}
190
191void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
192 if (ctx.profile.support_gl_derivative_control) {
193 ctx.AddF32("{}=dFdyFine({});", inst, op_a);
194 } else {
195 LOG_WARNING(Shader_GLSL, "Device does not support dFdyFine, fallback to dFdy");
196 ctx.AddF32("{}=dFdy({});", inst, op_a);
197 }
198}
199
200void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
201 if (ctx.profile.support_gl_derivative_control) {
202 ctx.AddF32("{}=dFdxCoarse({});", inst, op_a);
203 } else {
204 LOG_WARNING(Shader_GLSL, "Device does not support dFdxCoarse, fallback to dFdx");
205 ctx.AddF32("{}=dFdx({});", inst, op_a);
206 }
207}
208
209void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
210 if (ctx.profile.support_gl_derivative_control) {
211 ctx.AddF32("{}=dFdyCoarse({});", inst, op_a);
212 } else {
213 LOG_WARNING(Shader_GLSL, "Device does not support dFdyCoarse, fallback to dFdy");
214 ctx.AddF32("{}=dFdy({});", inst, op_a);
215 }
216}
217} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp
new file mode 100644
index 000000000..194f926ca
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp
@@ -0,0 +1,308 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6#include <string_view>
7
8#include <fmt/format.h>
9
10#include "shader_recompiler/backend/glsl/var_alloc.h"
11#include "shader_recompiler/exception.h"
12#include "shader_recompiler/frontend/ir/value.h"
13
14namespace Shader::Backend::GLSL {
15namespace {
16std::string TypePrefix(GlslVarType type) {
17 switch (type) {
18 case GlslVarType::U1:
19 return "b_";
20 case GlslVarType::F16x2:
21 return "f16x2_";
22 case GlslVarType::U32:
23 return "u_";
24 case GlslVarType::F32:
25 return "f_";
26 case GlslVarType::U64:
27 return "u64_";
28 case GlslVarType::F64:
29 return "d_";
30 case GlslVarType::U32x2:
31 return "u2_";
32 case GlslVarType::F32x2:
33 return "f2_";
34 case GlslVarType::U32x3:
35 return "u3_";
36 case GlslVarType::F32x3:
37 return "f3_";
38 case GlslVarType::U32x4:
39 return "u4_";
40 case GlslVarType::F32x4:
41 return "f4_";
42 case GlslVarType::PrecF32:
43 return "pf_";
44 case GlslVarType::PrecF64:
45 return "pd_";
46 case GlslVarType::Void:
47 return "";
48 default:
49 throw NotImplementedException("Type {}", type);
50 }
51}
52
53std::string FormatFloat(std::string_view value, IR::Type type) {
54 // TODO: Confirm FP64 nan/inf
55 if (type == IR::Type::F32) {
56 if (value == "nan") {
57 return "utof(0x7fc00000)";
58 }
59 if (value == "inf") {
60 return "utof(0x7f800000)";
61 }
62 if (value == "-inf") {
63 return "utof(0xff800000)";
64 }
65 }
66 if (value.find_first_of('e') != std::string_view::npos) {
67 // scientific notation
68 const auto cast{type == IR::Type::F32 ? "float" : "double"};
69 return fmt::format("{}({})", cast, value);
70 }
71 const bool needs_dot{value.find_first_of('.') == std::string_view::npos};
72 const bool needs_suffix{!value.ends_with('f')};
73 const auto suffix{type == IR::Type::F32 ? "f" : "lf"};
74 return fmt::format("{}{}{}", value, needs_dot ? "." : "", needs_suffix ? suffix : "");
75}
76
77std::string MakeImm(const IR::Value& value) {
78 switch (value.Type()) {
79 case IR::Type::U1:
80 return fmt::format("{}", value.U1() ? "true" : "false");
81 case IR::Type::U32:
82 return fmt::format("{}u", value.U32());
83 case IR::Type::F32:
84 return FormatFloat(fmt::format("{}", value.F32()), IR::Type::F32);
85 case IR::Type::U64:
86 return fmt::format("{}ul", value.U64());
87 case IR::Type::F64:
88 return FormatFloat(fmt::format("{}", value.F64()), IR::Type::F64);
89 case IR::Type::Void:
90 return "";
91 default:
92 throw NotImplementedException("Immediate type {}", value.Type());
93 }
94}
95} // Anonymous namespace
96
97std::string VarAlloc::Representation(u32 index, GlslVarType type) const {
98 const auto prefix{TypePrefix(type)};
99 return fmt::format("{}{}", prefix, index);
100}
101
102std::string VarAlloc::Representation(Id id) const {
103 return Representation(id.index, id.type);
104}
105
106std::string VarAlloc::Define(IR::Inst& inst, GlslVarType type) {
107 if (inst.HasUses()) {
108 inst.SetDefinition<Id>(Alloc(type));
109 return Representation(inst.Definition<Id>());
110 } else {
111 Id id{};
112 id.type.Assign(type);
113 GetUseTracker(type).uses_temp = true;
114 inst.SetDefinition<Id>(id);
115 return 't' + Representation(inst.Definition<Id>());
116 }
117}
118
119std::string VarAlloc::Define(IR::Inst& inst, IR::Type type) {
120 return Define(inst, RegType(type));
121}
122
123std::string VarAlloc::PhiDefine(IR::Inst& inst, IR::Type type) {
124 return AddDefine(inst, RegType(type));
125}
126
127std::string VarAlloc::AddDefine(IR::Inst& inst, GlslVarType type) {
128 if (inst.HasUses()) {
129 inst.SetDefinition<Id>(Alloc(type));
130 return Representation(inst.Definition<Id>());
131 } else {
132 return "";
133 }
134 return Representation(inst.Definition<Id>());
135}
136
137std::string VarAlloc::Consume(const IR::Value& value) {
138 return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive());
139}
140
141std::string VarAlloc::ConsumeInst(IR::Inst& inst) {
142 inst.DestructiveRemoveUsage();
143 if (!inst.HasUses()) {
144 Free(inst.Definition<Id>());
145 }
146 return Representation(inst.Definition<Id>());
147}
148
149std::string VarAlloc::GetGlslType(IR::Type type) const {
150 return GetGlslType(RegType(type));
151}
152
153Id VarAlloc::Alloc(GlslVarType type) {
154 auto& use_tracker{GetUseTracker(type)};
155 const auto num_vars{use_tracker.var_use.size()};
156 for (size_t var = 0; var < num_vars; ++var) {
157 if (use_tracker.var_use[var]) {
158 continue;
159 }
160 use_tracker.num_used = std::max(use_tracker.num_used, var + 1);
161 use_tracker.var_use[var] = true;
162 Id ret{};
163 ret.is_valid.Assign(1);
164 ret.type.Assign(type);
165 ret.index.Assign(static_cast<u32>(var));
166 return ret;
167 }
168 // Allocate a new variable
169 use_tracker.var_use.push_back(true);
170 Id ret{};
171 ret.is_valid.Assign(1);
172 ret.type.Assign(type);
173 ret.index.Assign(static_cast<u32>(use_tracker.num_used));
174 ++use_tracker.num_used;
175 return ret;
176}
177
178void VarAlloc::Free(Id id) {
179 if (id.is_valid == 0) {
180 throw LogicError("Freeing invalid variable");
181 }
182 auto& use_tracker{GetUseTracker(id.type)};
183 use_tracker.var_use[id.index] = false;
184}
185
186GlslVarType VarAlloc::RegType(IR::Type type) const {
187 switch (type) {
188 case IR::Type::U1:
189 return GlslVarType::U1;
190 case IR::Type::U32:
191 return GlslVarType::U32;
192 case IR::Type::F32:
193 return GlslVarType::F32;
194 case IR::Type::U64:
195 return GlslVarType::U64;
196 case IR::Type::F64:
197 return GlslVarType::F64;
198 default:
199 throw NotImplementedException("IR type {}", type);
200 }
201}
202
203std::string VarAlloc::GetGlslType(GlslVarType type) const {
204 switch (type) {
205 case GlslVarType::U1:
206 return "bool";
207 case GlslVarType::F16x2:
208 return "f16vec2";
209 case GlslVarType::U32:
210 return "uint";
211 case GlslVarType::F32:
212 case GlslVarType::PrecF32:
213 return "float";
214 case GlslVarType::U64:
215 return "uint64_t";
216 case GlslVarType::F64:
217 case GlslVarType::PrecF64:
218 return "double";
219 case GlslVarType::U32x2:
220 return "uvec2";
221 case GlslVarType::F32x2:
222 return "vec2";
223 case GlslVarType::U32x3:
224 return "uvec3";
225 case GlslVarType::F32x3:
226 return "vec3";
227 case GlslVarType::U32x4:
228 return "uvec4";
229 case GlslVarType::F32x4:
230 return "vec4";
231 case GlslVarType::Void:
232 return "";
233 default:
234 throw NotImplementedException("Type {}", type);
235 }
236}
237
238VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) {
239 switch (type) {
240 case GlslVarType::U1:
241 return var_bool;
242 case GlslVarType::F16x2:
243 return var_f16x2;
244 case GlslVarType::U32:
245 return var_u32;
246 case GlslVarType::F32:
247 return var_f32;
248 case GlslVarType::U64:
249 return var_u64;
250 case GlslVarType::F64:
251 return var_f64;
252 case GlslVarType::U32x2:
253 return var_u32x2;
254 case GlslVarType::F32x2:
255 return var_f32x2;
256 case GlslVarType::U32x3:
257 return var_u32x3;
258 case GlslVarType::F32x3:
259 return var_f32x3;
260 case GlslVarType::U32x4:
261 return var_u32x4;
262 case GlslVarType::F32x4:
263 return var_f32x4;
264 case GlslVarType::PrecF32:
265 return var_precf32;
266 case GlslVarType::PrecF64:
267 return var_precf64;
268 default:
269 throw NotImplementedException("Type {}", type);
270 }
271}
272
273const VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) const {
274 switch (type) {
275 case GlslVarType::U1:
276 return var_bool;
277 case GlslVarType::F16x2:
278 return var_f16x2;
279 case GlslVarType::U32:
280 return var_u32;
281 case GlslVarType::F32:
282 return var_f32;
283 case GlslVarType::U64:
284 return var_u64;
285 case GlslVarType::F64:
286 return var_f64;
287 case GlslVarType::U32x2:
288 return var_u32x2;
289 case GlslVarType::F32x2:
290 return var_f32x2;
291 case GlslVarType::U32x3:
292 return var_u32x3;
293 case GlslVarType::F32x3:
294 return var_f32x3;
295 case GlslVarType::U32x4:
296 return var_u32x4;
297 case GlslVarType::F32x4:
298 return var_f32x4;
299 case GlslVarType::PrecF32:
300 return var_precf32;
301 case GlslVarType::PrecF64:
302 return var_precf64;
303 default:
304 throw NotImplementedException("Type {}", type);
305 }
306}
307
308} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/var_alloc.h b/src/shader_recompiler/backend/glsl/var_alloc.h
new file mode 100644
index 000000000..8b49f32a6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/var_alloc.h
@@ -0,0 +1,105 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <bitset>
8#include <string>
9#include <vector>
10
11#include "common/bit_field.h"
12#include "common/common_types.h"
13
14namespace Shader::IR {
15class Inst;
16class Value;
17enum class Type;
18} // namespace Shader::IR
19
20namespace Shader::Backend::GLSL {
21enum class GlslVarType : u32 {
22 U1,
23 F16x2,
24 U32,
25 F32,
26 U64,
27 F64,
28 U32x2,
29 F32x2,
30 U32x3,
31 F32x3,
32 U32x4,
33 F32x4,
34 PrecF32,
35 PrecF64,
36 Void,
37};
38
39struct Id {
40 union {
41 u32 raw;
42 BitField<0, 1, u32> is_valid;
43 BitField<1, 4, GlslVarType> type;
44 BitField<6, 26, u32> index;
45 };
46
47 bool operator==(Id rhs) const noexcept {
48 return raw == rhs.raw;
49 }
50 bool operator!=(Id rhs) const noexcept {
51 return !operator==(rhs);
52 }
53};
54static_assert(sizeof(Id) == sizeof(u32));
55
56class VarAlloc {
57public:
58 struct UseTracker {
59 bool uses_temp{};
60 size_t num_used{};
61 std::vector<bool> var_use;
62 };
63
64 /// Used for explicit usages of variables, may revert to temporaries
65 std::string Define(IR::Inst& inst, GlslVarType type);
66 std::string Define(IR::Inst& inst, IR::Type type);
67
68 /// Used to assign variables used by the IR. May return a blank string if
69 /// the instruction's result is unused in the IR.
70 std::string AddDefine(IR::Inst& inst, GlslVarType type);
71 std::string PhiDefine(IR::Inst& inst, IR::Type type);
72
73 std::string Consume(const IR::Value& value);
74 std::string ConsumeInst(IR::Inst& inst);
75
76 std::string GetGlslType(GlslVarType type) const;
77 std::string GetGlslType(IR::Type type) const;
78
79 const UseTracker& GetUseTracker(GlslVarType type) const;
80 std::string Representation(u32 index, GlslVarType type) const;
81
82private:
83 GlslVarType RegType(IR::Type type) const;
84 Id Alloc(GlslVarType type);
85 void Free(Id id);
86 UseTracker& GetUseTracker(GlslVarType type);
87 std::string Representation(Id id) const;
88
89 UseTracker var_bool{};
90 UseTracker var_f16x2{};
91 UseTracker var_u32{};
92 UseTracker var_u32x2{};
93 UseTracker var_u32x3{};
94 UseTracker var_u32x4{};
95 UseTracker var_f32{};
96 UseTracker var_f32x2{};
97 UseTracker var_f32x3{};
98 UseTracker var_f32x4{};
99 UseTracker var_u64{};
100 UseTracker var_f64{};
101 UseTracker var_precf32{};
102 UseTracker var_precf64{};
103};
104
105} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
new file mode 100644
index 000000000..2d29d8c14
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -0,0 +1,1368 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <climits>
8#include <string_view>
9
10#include <fmt/format.h>
11
12#include "common/common_types.h"
13#include "common/div_ceil.h"
14#include "shader_recompiler/backend/spirv/emit_context.h"
15
16namespace Shader::Backend::SPIRV {
17namespace {
18enum class Operation {
19 Increment,
20 Decrement,
21 FPAdd,
22 FPMin,
23 FPMax,
24};
25
26struct AttrInfo {
27 Id pointer;
28 Id id;
29 bool needs_cast;
30};
31
32Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
33 const spv::ImageFormat format{spv::ImageFormat::Unknown};
34 const Id type{ctx.F32[1]};
35 const bool depth{desc.is_depth};
36 switch (desc.type) {
37 case TextureType::Color1D:
38 return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format);
39 case TextureType::ColorArray1D:
40 return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format);
41 case TextureType::Color2D:
42 return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format);
43 case TextureType::ColorArray2D:
44 return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format);
45 case TextureType::Color3D:
46 return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format);
47 case TextureType::ColorCube:
48 return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format);
49 case TextureType::ColorArrayCube:
50 return ctx.TypeImage(type, spv::Dim::Cube, depth, true, false, 1, format);
51 case TextureType::Buffer:
52 break;
53 }
54 throw InvalidArgument("Invalid texture type {}", desc.type);
55}
56
57spv::ImageFormat GetImageFormat(ImageFormat format) {
58 switch (format) {
59 case ImageFormat::Typeless:
60 return spv::ImageFormat::Unknown;
61 case ImageFormat::R8_UINT:
62 return spv::ImageFormat::R8ui;
63 case ImageFormat::R8_SINT:
64 return spv::ImageFormat::R8i;
65 case ImageFormat::R16_UINT:
66 return spv::ImageFormat::R16ui;
67 case ImageFormat::R16_SINT:
68 return spv::ImageFormat::R16i;
69 case ImageFormat::R32_UINT:
70 return spv::ImageFormat::R32ui;
71 case ImageFormat::R32G32_UINT:
72 return spv::ImageFormat::Rg32ui;
73 case ImageFormat::R32G32B32A32_UINT:
74 return spv::ImageFormat::Rgba32ui;
75 }
76 throw InvalidArgument("Invalid image format {}", format);
77}
78
79Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) {
80 const spv::ImageFormat format{GetImageFormat(desc.format)};
81 const Id type{ctx.U32[1]};
82 switch (desc.type) {
83 case TextureType::Color1D:
84 return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 2, format);
85 case TextureType::ColorArray1D:
86 return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 2, format);
87 case TextureType::Color2D:
88 return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 2, format);
89 case TextureType::ColorArray2D:
90 return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 2, format);
91 case TextureType::Color3D:
92 return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 2, format);
93 case TextureType::Buffer:
94 throw NotImplementedException("Image buffer");
95 default:
96 break;
97 }
98 throw InvalidArgument("Invalid texture type {}", desc.type);
99}
100
101Id DefineVariable(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin,
102 spv::StorageClass storage_class) {
103 const Id pointer_type{ctx.TypePointer(storage_class, type)};
104 const Id id{ctx.AddGlobalVariable(pointer_type, storage_class)};
105 if (builtin) {
106 ctx.Decorate(id, spv::Decoration::BuiltIn, *builtin);
107 }
108 ctx.interfaces.push_back(id);
109 return id;
110}
111
112u32 NumVertices(InputTopology input_topology) {
113 switch (input_topology) {
114 case InputTopology::Points:
115 return 1;
116 case InputTopology::Lines:
117 return 2;
118 case InputTopology::LinesAdjacency:
119 return 4;
120 case InputTopology::Triangles:
121 return 3;
122 case InputTopology::TrianglesAdjacency:
123 return 6;
124 }
125 throw InvalidArgument("Invalid input topology {}", input_topology);
126}
127
128Id DefineInput(EmitContext& ctx, Id type, bool per_invocation,
129 std::optional<spv::BuiltIn> builtin = std::nullopt) {
130 switch (ctx.stage) {
131 case Stage::TessellationControl:
132 case Stage::TessellationEval:
133 if (per_invocation) {
134 type = ctx.TypeArray(type, ctx.Const(32u));
135 }
136 break;
137 case Stage::Geometry:
138 if (per_invocation) {
139 const u32 num_vertices{NumVertices(ctx.runtime_info.input_topology)};
140 type = ctx.TypeArray(type, ctx.Const(num_vertices));
141 }
142 break;
143 default:
144 break;
145 }
146 return DefineVariable(ctx, type, builtin, spv::StorageClass::Input);
147}
148
149Id DefineOutput(EmitContext& ctx, Id type, std::optional<u32> invocations,
150 std::optional<spv::BuiltIn> builtin = std::nullopt) {
151 if (invocations && ctx.stage == Stage::TessellationControl) {
152 type = ctx.TypeArray(type, ctx.Const(*invocations));
153 }
154 return DefineVariable(ctx, type, builtin, spv::StorageClass::Output);
155}
156
157void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invocations) {
158 static constexpr std::string_view swizzle{"xyzw"};
159 const size_t base_attr_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
160 u32 element{0};
161 while (element < 4) {
162 const u32 remainder{4 - element};
163 const TransformFeedbackVarying* xfb_varying{};
164 if (!ctx.runtime_info.xfb_varyings.empty()) {
165 xfb_varying = &ctx.runtime_info.xfb_varyings[base_attr_index + element];
166 xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr;
167 }
168 const u32 num_components{xfb_varying ? xfb_varying->components : remainder};
169
170 const Id id{DefineOutput(ctx, ctx.F32[num_components], invocations)};
171 ctx.Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
172 if (element > 0) {
173 ctx.Decorate(id, spv::Decoration::Component, element);
174 }
175 if (xfb_varying) {
176 ctx.Decorate(id, spv::Decoration::XfbBuffer, xfb_varying->buffer);
177 ctx.Decorate(id, spv::Decoration::XfbStride, xfb_varying->stride);
178 ctx.Decorate(id, spv::Decoration::Offset, xfb_varying->offset);
179 }
180 if (num_components < 4 || element > 0) {
181 const std::string_view subswizzle{swizzle.substr(element, num_components)};
182 ctx.Name(id, fmt::format("out_attr{}_{}", index, subswizzle));
183 } else {
184 ctx.Name(id, fmt::format("out_attr{}", index));
185 }
186 const GenericElementInfo info{
187 .id = id,
188 .first_element = element,
189 .num_components = num_components,
190 };
191 std::fill_n(ctx.output_generics[index].begin() + element, num_components, info);
192 element += num_components;
193 }
194}
195
196Id GetAttributeType(EmitContext& ctx, AttributeType type) {
197 switch (type) {
198 case AttributeType::Float:
199 return ctx.F32[4];
200 case AttributeType::SignedInt:
201 return ctx.TypeVector(ctx.TypeInt(32, true), 4);
202 case AttributeType::UnsignedInt:
203 return ctx.U32[4];
204 case AttributeType::Disabled:
205 break;
206 }
207 throw InvalidArgument("Invalid attribute type {}", type);
208}
209
210std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) {
211 const AttributeType type{ctx.runtime_info.generic_input_types.at(index)};
212 switch (type) {
213 case AttributeType::Float:
214 return AttrInfo{ctx.input_f32, ctx.F32[1], false};
215 case AttributeType::UnsignedInt:
216 return AttrInfo{ctx.input_u32, ctx.U32[1], true};
217 case AttributeType::SignedInt:
218 return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true};
219 case AttributeType::Disabled:
220 return std::nullopt;
221 }
222 throw InvalidArgument("Invalid attribute type {}", type);
223}
224
225std::string_view StageName(Stage stage) {
226 switch (stage) {
227 case Stage::VertexA:
228 return "vs_a";
229 case Stage::VertexB:
230 return "vs";
231 case Stage::TessellationControl:
232 return "tcs";
233 case Stage::TessellationEval:
234 return "tes";
235 case Stage::Geometry:
236 return "gs";
237 case Stage::Fragment:
238 return "fs";
239 case Stage::Compute:
240 return "cs";
241 }
242 throw InvalidArgument("Invalid stage {}", stage);
243}
244
245template <typename... Args>
246void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... args) {
247 ctx.Name(object, fmt::format(fmt::runtime(format_str), StageName(ctx.stage),
248 std::forward<Args>(args)...)
249 .c_str());
250}
251
252void DefineConstBuffers(EmitContext& ctx, const Info& info, Id UniformDefinitions::*member_type,
253 u32 binding, Id type, char type_char, u32 element_size) {
254 const Id array_type{ctx.TypeArray(type, ctx.Const(65536U / element_size))};
255 ctx.Decorate(array_type, spv::Decoration::ArrayStride, element_size);
256
257 const Id struct_type{ctx.TypeStruct(array_type)};
258 Name(ctx, struct_type, "{}_cbuf_block_{}{}", ctx.stage, type_char, element_size * CHAR_BIT);
259 ctx.Decorate(struct_type, spv::Decoration::Block);
260 ctx.MemberName(struct_type, 0, "data");
261 ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
262
263 const Id struct_pointer_type{ctx.TypePointer(spv::StorageClass::Uniform, struct_type)};
264 const Id uniform_type{ctx.TypePointer(spv::StorageClass::Uniform, type)};
265 ctx.uniform_types.*member_type = uniform_type;
266
267 for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
268 const Id id{ctx.AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)};
269 ctx.Decorate(id, spv::Decoration::Binding, binding);
270 ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U);
271 ctx.Name(id, fmt::format("c{}", desc.index));
272 for (size_t i = 0; i < desc.count; ++i) {
273 ctx.cbufs[desc.index + i].*member_type = id;
274 }
275 if (ctx.profile.supported_spirv >= 0x00010400) {
276 ctx.interfaces.push_back(id);
277 }
278 binding += desc.count;
279 }
280}
281
282void DefineSsbos(EmitContext& ctx, StorageTypeDefinition& type_def,
283 Id StorageDefinitions::*member_type, const Info& info, u32 binding, Id type,
284 u32 stride) {
285 const Id array_type{ctx.TypeRuntimeArray(type)};
286 ctx.Decorate(array_type, spv::Decoration::ArrayStride, stride);
287
288 const Id struct_type{ctx.TypeStruct(array_type)};
289 ctx.Decorate(struct_type, spv::Decoration::Block);
290 ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
291
292 const Id struct_pointer{ctx.TypePointer(spv::StorageClass::StorageBuffer, struct_type)};
293 type_def.array = struct_pointer;
294 type_def.element = ctx.TypePointer(spv::StorageClass::StorageBuffer, type);
295
296 u32 index{};
297 for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
298 const Id id{ctx.AddGlobalVariable(struct_pointer, spv::StorageClass::StorageBuffer)};
299 ctx.Decorate(id, spv::Decoration::Binding, binding);
300 ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U);
301 ctx.Name(id, fmt::format("ssbo{}", index));
302 if (ctx.profile.supported_spirv >= 0x00010400) {
303 ctx.interfaces.push_back(id);
304 }
305 for (size_t i = 0; i < desc.count; ++i) {
306 ctx.ssbos[index + i].*member_type = id;
307 }
308 index += desc.count;
309 binding += desc.count;
310 }
311}
312
313Id CasFunction(EmitContext& ctx, Operation operation, Id value_type) {
314 const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)};
315 const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)};
316 const Id op_a{ctx.OpFunctionParameter(value_type)};
317 const Id op_b{ctx.OpFunctionParameter(value_type)};
318 ctx.AddLabel();
319 Id result{};
320 switch (operation) {
321 case Operation::Increment: {
322 const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)};
323 const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))};
324 result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr);
325 break;
326 }
327 case Operation::Decrement: {
328 const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))};
329 const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)};
330 const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)};
331 const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))};
332 result = ctx.OpSelect(value_type, pred, op_b, decr);
333 break;
334 }
335 case Operation::FPAdd:
336 result = ctx.OpFAdd(value_type, op_a, op_b);
337 break;
338 case Operation::FPMin:
339 result = ctx.OpFMin(value_type, op_a, op_b);
340 break;
341 case Operation::FPMax:
342 result = ctx.OpFMax(value_type, op_a, op_b);
343 break;
344 default:
345 break;
346 }
347 ctx.OpReturnValue(result);
348 ctx.OpFunctionEnd();
349 return func;
350}
351
352Id CasLoop(EmitContext& ctx, Operation operation, Id array_pointer, Id element_pointer,
353 Id value_type, Id memory_type, spv::Scope scope) {
354 const bool is_shared{scope == spv::Scope::Workgroup};
355 const bool is_struct{!is_shared || ctx.profile.support_explicit_workgroup_layout};
356 const Id cas_func{CasFunction(ctx, operation, value_type)};
357 const Id zero{ctx.u32_zero_value};
358 const Id scope_id{ctx.Const(static_cast<u32>(scope))};
359
360 const Id loop_header{ctx.OpLabel()};
361 const Id continue_block{ctx.OpLabel()};
362 const Id merge_block{ctx.OpLabel()};
363 const Id func_type{is_shared
364 ? ctx.TypeFunction(value_type, ctx.U32[1], value_type)
365 : ctx.TypeFunction(value_type, ctx.U32[1], value_type, array_pointer)};
366
367 const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)};
368 const Id index{ctx.OpFunctionParameter(ctx.U32[1])};
369 const Id op_b{ctx.OpFunctionParameter(value_type)};
370 const Id base{is_shared ? ctx.shared_memory_u32 : ctx.OpFunctionParameter(array_pointer)};
371 ctx.AddLabel();
372 ctx.OpBranch(loop_header);
373 ctx.AddLabel(loop_header);
374
375 ctx.OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
376 ctx.OpBranch(continue_block);
377
378 ctx.AddLabel(continue_block);
379 const Id word_pointer{is_struct ? ctx.OpAccessChain(element_pointer, base, zero, index)
380 : ctx.OpAccessChain(element_pointer, base, index)};
381 if (value_type.value == ctx.F32[2].value) {
382 const Id u32_value{ctx.OpLoad(ctx.U32[1], word_pointer)};
383 const Id value{ctx.OpUnpackHalf2x16(ctx.F32[2], u32_value)};
384 const Id new_value{ctx.OpFunctionCall(value_type, cas_func, value, op_b)};
385 const Id u32_new_value{ctx.OpPackHalf2x16(ctx.U32[1], new_value)};
386 const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero,
387 zero, u32_new_value, u32_value)};
388 const Id success{ctx.OpIEqual(ctx.U1, atomic_res, u32_value)};
389 ctx.OpBranchConditional(success, merge_block, loop_header);
390
391 ctx.AddLabel(merge_block);
392 ctx.OpReturnValue(ctx.OpUnpackHalf2x16(ctx.F32[2], atomic_res));
393 } else {
394 const Id value{ctx.OpLoad(memory_type, word_pointer)};
395 const bool matching_type{value_type.value == memory_type.value};
396 const Id bitcast_value{matching_type ? value : ctx.OpBitcast(value_type, value)};
397 const Id cal_res{ctx.OpFunctionCall(value_type, cas_func, bitcast_value, op_b)};
398 const Id new_value{matching_type ? cal_res : ctx.OpBitcast(memory_type, cal_res)};
399 const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero,
400 zero, new_value, value)};
401 const Id success{ctx.OpIEqual(ctx.U1, atomic_res, value)};
402 ctx.OpBranchConditional(success, merge_block, loop_header);
403
404 ctx.AddLabel(merge_block);
405 ctx.OpReturnValue(ctx.OpBitcast(value_type, atomic_res));
406 }
407 ctx.OpFunctionEnd();
408 return func;
409}
410
411template <typename Desc>
412std::string NameOf(Stage stage, const Desc& desc, std::string_view prefix) {
413 if (desc.count > 1) {
414 return fmt::format("{}_{}{}_{:02x}x{}", StageName(stage), prefix, desc.cbuf_index,
415 desc.cbuf_offset, desc.count);
416 } else {
417 return fmt::format("{}_{}{}_{:02x}", StageName(stage), prefix, desc.cbuf_index,
418 desc.cbuf_offset);
419 }
420}
421
422Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) {
423 if (count > 1) {
424 const Id array_type{ctx.TypeArray(sampled_type, ctx.Const(count))};
425 return ctx.TypePointer(spv::StorageClass::UniformConstant, array_type);
426 } else {
427 return pointer_type;
428 }
429}
430} // Anonymous namespace
431
432void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
433 defs[0] = sirit_ctx.Name(base_type, name);
434
435 std::array<char, 6> def_name;
436 for (int i = 1; i < 4; ++i) {
437 const std::string_view def_name_view(
438 def_name.data(),
439 fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size);
440 defs[static_cast<size_t>(i)] =
441 sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view);
442 }
443}
444
445EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
446 IR::Program& program, Bindings& bindings)
447 : Sirit::Module(profile_.supported_spirv), profile{profile_},
448 runtime_info{runtime_info_}, stage{program.stage} {
449 const bool is_unified{profile.unified_descriptor_binding};
450 u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer};
451 u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer};
452 u32& texture_binding{is_unified ? bindings.unified : bindings.texture};
453 u32& image_binding{is_unified ? bindings.unified : bindings.image};
454 AddCapability(spv::Capability::Shader);
455 DefineCommonTypes(program.info);
456 DefineCommonConstants();
457 DefineInterfaces(program);
458 DefineLocalMemory(program);
459 DefineSharedMemory(program);
460 DefineSharedMemoryFunctions(program);
461 DefineConstantBuffers(program.info, uniform_binding);
462 DefineStorageBuffers(program.info, storage_binding);
463 DefineTextureBuffers(program.info, texture_binding);
464 DefineImageBuffers(program.info, image_binding);
465 DefineTextures(program.info, texture_binding);
466 DefineImages(program.info, image_binding);
467 DefineAttributeMemAccess(program.info);
468 DefineGlobalMemoryFunctions(program.info);
469}
470
471EmitContext::~EmitContext() = default;
472
473Id EmitContext::Def(const IR::Value& value) {
474 if (!value.IsImmediate()) {
475 return value.InstRecursive()->Definition<Id>();
476 }
477 switch (value.Type()) {
478 case IR::Type::Void:
479 // Void instructions are used for optional arguments (e.g. texture offsets)
480 // They are not meant to be used in the SPIR-V module
481 return Id{};
482 case IR::Type::U1:
483 return value.U1() ? true_value : false_value;
484 case IR::Type::U32:
485 return Const(value.U32());
486 case IR::Type::U64:
487 return Constant(U64, value.U64());
488 case IR::Type::F32:
489 return Const(value.F32());
490 case IR::Type::F64:
491 return Constant(F64[1], value.F64());
492 default:
493 throw NotImplementedException("Immediate type {}", value.Type());
494 }
495}
496
497Id EmitContext::BitOffset8(const IR::Value& offset) {
498 if (offset.IsImmediate()) {
499 return Const((offset.U32() % 4) * 8);
500 }
501 return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(24u));
502}
503
504Id EmitContext::BitOffset16(const IR::Value& offset) {
505 if (offset.IsImmediate()) {
506 return Const(((offset.U32() / 2) % 2) * 16);
507 }
508 return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(16u));
509}
510
511void EmitContext::DefineCommonTypes(const Info& info) {
512 void_id = TypeVoid();
513
514 U1 = Name(TypeBool(), "u1");
515
516 F32.Define(*this, TypeFloat(32), "f32");
517 U32.Define(*this, TypeInt(32, false), "u32");
518 S32.Define(*this, TypeInt(32, true), "s32");
519
520 private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32");
521
522 input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32");
523 input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32");
524 input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32");
525
526 output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
527 output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
528
529 if (info.uses_int8 && profile.support_int8) {
530 AddCapability(spv::Capability::Int8);
531 U8 = Name(TypeInt(8, false), "u8");
532 S8 = Name(TypeInt(8, true), "s8");
533 }
534 if (info.uses_int16 && profile.support_int16) {
535 AddCapability(spv::Capability::Int16);
536 U16 = Name(TypeInt(16, false), "u16");
537 S16 = Name(TypeInt(16, true), "s16");
538 }
539 if (info.uses_int64) {
540 AddCapability(spv::Capability::Int64);
541 U64 = Name(TypeInt(64, false), "u64");
542 }
543 if (info.uses_fp16) {
544 AddCapability(spv::Capability::Float16);
545 F16.Define(*this, TypeFloat(16), "f16");
546 }
547 if (info.uses_fp64) {
548 AddCapability(spv::Capability::Float64);
549 F64.Define(*this, TypeFloat(64), "f64");
550 }
551}
552
553void EmitContext::DefineCommonConstants() {
554 true_value = ConstantTrue(U1);
555 false_value = ConstantFalse(U1);
556 u32_zero_value = Const(0U);
557 f32_zero_value = Const(0.0f);
558}
559
560void EmitContext::DefineInterfaces(const IR::Program& program) {
561 DefineInputs(program);
562 DefineOutputs(program);
563}
564
565void EmitContext::DefineLocalMemory(const IR::Program& program) {
566 if (program.local_memory_size == 0) {
567 return;
568 }
569 const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)};
570 const Id type{TypeArray(U32[1], Const(num_elements))};
571 const Id pointer{TypePointer(spv::StorageClass::Private, type)};
572 local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private);
573 if (profile.supported_spirv >= 0x00010400) {
574 interfaces.push_back(local_memory);
575 }
576}
577
578void EmitContext::DefineSharedMemory(const IR::Program& program) {
579 if (program.shared_memory_size == 0) {
580 return;
581 }
582 const auto make{[&](Id element_type, u32 element_size) {
583 const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)};
584 const Id array_type{TypeArray(element_type, Const(num_elements))};
585 Decorate(array_type, spv::Decoration::ArrayStride, element_size);
586
587 const Id struct_type{TypeStruct(array_type)};
588 MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U);
589 Decorate(struct_type, spv::Decoration::Block);
590
591 const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)};
592 const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)};
593 const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)};
594 Decorate(variable, spv::Decoration::Aliased);
595 interfaces.push_back(variable);
596
597 return std::make_tuple(variable, element_pointer, pointer);
598 }};
599 if (profile.support_explicit_workgroup_layout) {
600 AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
601 AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
602 if (program.info.uses_int8) {
603 AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
604 std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1);
605 }
606 if (program.info.uses_int16) {
607 AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
608 std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2);
609 }
610 if (program.info.uses_int64) {
611 std::tie(shared_memory_u64, shared_u64, std::ignore) = make(U64, 8);
612 }
613 std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4);
614 std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8);
615 std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16);
616 return;
617 }
618 const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)};
619 const Id type{TypeArray(U32[1], Const(num_elements))};
620 shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
621
622 shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
623 shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
624 interfaces.push_back(shared_memory_u32);
625
626 const Id func_type{TypeFunction(void_id, U32[1], U32[1])};
627 const auto make_function{[&](u32 mask, u32 size) {
628 const Id loop_header{OpLabel()};
629 const Id continue_block{OpLabel()};
630 const Id merge_block{OpLabel()};
631
632 const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)};
633 const Id offset{OpFunctionParameter(U32[1])};
634 const Id insert_value{OpFunctionParameter(U32[1])};
635 AddLabel();
636 OpBranch(loop_header);
637
638 AddLabel(loop_header);
639 const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Const(2U))};
640 const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Const(3U))};
641 const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Const(mask))};
642 const Id count{Const(size)};
643 OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
644 OpBranch(continue_block);
645
646 AddLabel(continue_block);
647 const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)};
648 const Id old_value{OpLoad(U32[1], word_pointer)};
649 const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)};
650 const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Const(1U), u32_zero_value,
651 u32_zero_value, new_value, old_value)};
652 const Id success{OpIEqual(U1, atomic_res, old_value)};
653 OpBranchConditional(success, merge_block, loop_header);
654
655 AddLabel(merge_block);
656 OpReturn();
657 OpFunctionEnd();
658 return func;
659 }};
660 if (program.info.uses_int8) {
661 shared_store_u8_func = make_function(24, 8);
662 }
663 if (program.info.uses_int16) {
664 shared_store_u16_func = make_function(16, 16);
665 }
666}
667
668void EmitContext::DefineSharedMemoryFunctions(const IR::Program& program) {
669 if (program.info.uses_shared_increment) {
670 increment_cas_shared = CasLoop(*this, Operation::Increment, shared_memory_u32_type,
671 shared_u32, U32[1], U32[1], spv::Scope::Workgroup);
672 }
673 if (program.info.uses_shared_decrement) {
674 decrement_cas_shared = CasLoop(*this, Operation::Decrement, shared_memory_u32_type,
675 shared_u32, U32[1], U32[1], spv::Scope::Workgroup);
676 }
677}
678
679void EmitContext::DefineAttributeMemAccess(const Info& info) {
680 const auto make_load{[&] {
681 const bool is_array{stage == Stage::Geometry};
682 const Id end_block{OpLabel()};
683 const Id default_label{OpLabel()};
684
685 const Id func_type_load{is_array ? TypeFunction(F32[1], U32[1], U32[1])
686 : TypeFunction(F32[1], U32[1])};
687 const Id func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type_load)};
688 const Id offset{OpFunctionParameter(U32[1])};
689 const Id vertex{is_array ? OpFunctionParameter(U32[1]) : Id{}};
690
691 AddLabel();
692 const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))};
693 const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))};
694 const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))};
695 std::vector<Sirit::Literal> literals;
696 std::vector<Id> labels;
697 if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
698 literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2);
699 labels.push_back(OpLabel());
700 }
701 const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
702 for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) {
703 if (!info.loads.Generic(index)) {
704 continue;
705 }
706 literals.push_back(base_attribute_value + index);
707 labels.push_back(OpLabel());
708 }
709 OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone);
710 OpSwitch(compare_index, default_label, literals, labels);
711 AddLabel(default_label);
712 OpReturnValue(Const(0.0f));
713 size_t label_index{0};
714 if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
715 AddLabel(labels[label_index]);
716 const Id pointer{is_array
717 ? OpAccessChain(input_f32, input_position, vertex, masked_index)
718 : OpAccessChain(input_f32, input_position, masked_index)};
719 const Id result{OpLoad(F32[1], pointer)};
720 OpReturnValue(result);
721 ++label_index;
722 }
723 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
724 if (!info.loads.Generic(index)) {
725 continue;
726 }
727 AddLabel(labels[label_index]);
728 const auto type{AttrTypes(*this, static_cast<u32>(index))};
729 if (!type) {
730 OpReturnValue(Const(0.0f));
731 ++label_index;
732 continue;
733 }
734 const Id generic_id{input_generics.at(index)};
735 const Id pointer{is_array
736 ? OpAccessChain(type->pointer, generic_id, vertex, masked_index)
737 : OpAccessChain(type->pointer, generic_id, masked_index)};
738 const Id value{OpLoad(type->id, pointer)};
739 const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value};
740 OpReturnValue(result);
741 ++label_index;
742 }
743 AddLabel(end_block);
744 OpUnreachable();
745 OpFunctionEnd();
746 return func;
747 }};
748 const auto make_store{[&] {
749 const Id end_block{OpLabel()};
750 const Id default_label{OpLabel()};
751
752 const Id func_type_store{TypeFunction(void_id, U32[1], F32[1])};
753 const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type_store)};
754 const Id offset{OpFunctionParameter(U32[1])};
755 const Id store_value{OpFunctionParameter(F32[1])};
756 AddLabel();
757 const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))};
758 const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))};
759 const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))};
760 std::vector<Sirit::Literal> literals;
761 std::vector<Id> labels;
762 if (info.stores.AnyComponent(IR::Attribute::PositionX)) {
763 literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2);
764 labels.push_back(OpLabel());
765 }
766 const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
767 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
768 if (!info.stores.Generic(index)) {
769 continue;
770 }
771 literals.push_back(base_attribute_value + static_cast<u32>(index));
772 labels.push_back(OpLabel());
773 }
774 if (info.stores.ClipDistances()) {
775 literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance0) >> 2);
776 labels.push_back(OpLabel());
777 literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance4) >> 2);
778 labels.push_back(OpLabel());
779 }
780 OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone);
781 OpSwitch(compare_index, default_label, literals, labels);
782 AddLabel(default_label);
783 OpReturn();
784 size_t label_index{0};
785 if (info.stores.AnyComponent(IR::Attribute::PositionX)) {
786 AddLabel(labels[label_index]);
787 const Id pointer{OpAccessChain(output_f32, output_position, masked_index)};
788 OpStore(pointer, store_value);
789 OpReturn();
790 ++label_index;
791 }
792 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
793 if (!info.stores.Generic(index)) {
794 continue;
795 }
796 if (output_generics[index][0].num_components != 4) {
797 throw NotImplementedException("Physical stores and transform feedbacks");
798 }
799 AddLabel(labels[label_index]);
800 const Id generic_id{output_generics[index][0].id};
801 const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)};
802 OpStore(pointer, store_value);
803 OpReturn();
804 ++label_index;
805 }
806 if (info.stores.ClipDistances()) {
807 AddLabel(labels[label_index]);
808 const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)};
809 OpStore(pointer, store_value);
810 OpReturn();
811 ++label_index;
812 AddLabel(labels[label_index]);
813 const Id fixed_index{OpIAdd(U32[1], masked_index, Const(4U))};
814 const Id pointer2{OpAccessChain(output_f32, clip_distances, fixed_index)};
815 OpStore(pointer2, store_value);
816 OpReturn();
817 ++label_index;
818 }
819 AddLabel(end_block);
820 OpUnreachable();
821 OpFunctionEnd();
822 return func;
823 }};
824 if (info.loads_indexed_attributes) {
825 indexed_load_func = make_load();
826 }
827 if (info.stores_indexed_attributes) {
828 indexed_store_func = make_store();
829 }
830}
831
832void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
833 if (!info.uses_global_memory || !profile.support_int64) {
834 return;
835 }
836 using DefPtr = Id StorageDefinitions::*;
837 const Id zero{u32_zero_value};
838 const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift,
839 auto&& callback) {
840 AddLabel();
841 const size_t num_buffers{info.storage_buffers_descriptors.size()};
842 for (size_t index = 0; index < num_buffers; ++index) {
843 if (!info.nvn_buffer_used[index]) {
844 continue;
845 }
846 const auto& ssbo{info.storage_buffers_descriptors[index]};
847 const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)};
848 const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)};
849 const Id ssbo_addr_pointer{OpAccessChain(
850 uniform_types.U32x2, cbufs[ssbo.cbuf_index].U32x2, zero, ssbo_addr_cbuf_offset)};
851 const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32,
852 zero, ssbo_size_cbuf_offset)};
853
854 const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))};
855 const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))};
856 const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)};
857 const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr),
858 OpULessThan(U1, addr, ssbo_end))};
859 const Id then_label{OpLabel()};
860 const Id else_label{OpLabel()};
861 OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone);
862 OpBranchConditional(cond, then_label, else_label);
863 AddLabel(then_label);
864 const Id ssbo_id{ssbos[index].*ssbo_member};
865 const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))};
866 const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))};
867 const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)};
868 callback(ssbo_pointer);
869 AddLabel(else_label);
870 }
871 }};
872 const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) {
873 const Id function_type{TypeFunction(type, U64)};
874 const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)};
875 const Id addr{OpFunctionParameter(U64)};
876 define_body(ssbo_member, addr, element_pointer, shift,
877 [&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); });
878 OpReturnValue(ConstantNull(type));
879 OpFunctionEnd();
880 return func_id;
881 }};
882 const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) {
883 const Id function_type{TypeFunction(void_id, U64, type)};
884 const Id func_id{OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)};
885 const Id addr{OpFunctionParameter(U64)};
886 const Id data{OpFunctionParameter(type)};
887 define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) {
888 OpStore(ssbo_pointer, data);
889 OpReturn();
890 });
891 OpReturn();
892 OpFunctionEnd();
893 return func_id;
894 }};
895 const auto define{
896 [&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) {
897 const Id element_type{type_def.element};
898 const u32 shift{static_cast<u32>(std::countr_zero(size))};
899 const Id load_func{define_load(ssbo_member, element_type, type, shift)};
900 const Id write_func{define_write(ssbo_member, element_type, type, shift)};
901 return std::make_pair(load_func, write_func);
902 }};
903 std::tie(load_global_func_u32, write_global_func_u32) =
904 define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32));
905 std::tie(load_global_func_u32x2, write_global_func_u32x2) =
906 define(&StorageDefinitions::U32x2, storage_types.U32x2, U32[2], sizeof(u32[2]));
907 std::tie(load_global_func_u32x4, write_global_func_u32x4) =
908 define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4]));
909}
910
911void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
912 if (info.constant_buffer_descriptors.empty()) {
913 return;
914 }
915 if (!profile.support_descriptor_aliasing) {
916 DefineConstBuffers(*this, info, &UniformDefinitions::U32x4, binding, U32[4], 'u',
917 sizeof(u32[4]));
918 for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
919 binding += desc.count;
920 }
921 return;
922 }
923 IR::Type types{info.used_constant_buffer_types};
924 if (True(types & IR::Type::U8)) {
925 if (profile.support_int8) {
926 DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8));
927 DefineConstBuffers(*this, info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8));
928 } else {
929 types |= IR::Type::U32;
930 }
931 }
932 if (True(types & IR::Type::U16)) {
933 if (profile.support_int16) {
934 DefineConstBuffers(*this, info, &UniformDefinitions::U16, binding, U16, 'u',
935 sizeof(u16));
936 DefineConstBuffers(*this, info, &UniformDefinitions::S16, binding, S16, 's',
937 sizeof(s16));
938 } else {
939 types |= IR::Type::U32;
940 }
941 }
942 if (True(types & IR::Type::U32)) {
943 DefineConstBuffers(*this, info, &UniformDefinitions::U32, binding, U32[1], 'u',
944 sizeof(u32));
945 }
946 if (True(types & IR::Type::F32)) {
947 DefineConstBuffers(*this, info, &UniformDefinitions::F32, binding, F32[1], 'f',
948 sizeof(f32));
949 }
950 if (True(types & IR::Type::U32x2)) {
951 DefineConstBuffers(*this, info, &UniformDefinitions::U32x2, binding, U32[2], 'u',
952 sizeof(u32[2]));
953 }
954 binding += static_cast<u32>(info.constant_buffer_descriptors.size());
955}
956
957void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
958 if (info.storage_buffers_descriptors.empty()) {
959 return;
960 }
961 AddExtension("SPV_KHR_storage_buffer_storage_class");
962
963 const IR::Type used_types{profile.support_descriptor_aliasing ? info.used_storage_buffer_types
964 : IR::Type::U32};
965 if (profile.support_int8 && True(used_types & IR::Type::U8)) {
966 DefineSsbos(*this, storage_types.U8, &StorageDefinitions::U8, info, binding, U8,
967 sizeof(u8));
968 DefineSsbos(*this, storage_types.S8, &StorageDefinitions::S8, info, binding, S8,
969 sizeof(u8));
970 }
971 if (profile.support_int16 && True(used_types & IR::Type::U16)) {
972 DefineSsbos(*this, storage_types.U16, &StorageDefinitions::U16, info, binding, U16,
973 sizeof(u16));
974 DefineSsbos(*this, storage_types.S16, &StorageDefinitions::S16, info, binding, S16,
975 sizeof(u16));
976 }
977 if (True(used_types & IR::Type::U32)) {
978 DefineSsbos(*this, storage_types.U32, &StorageDefinitions::U32, info, binding, U32[1],
979 sizeof(u32));
980 }
981 if (True(used_types & IR::Type::F32)) {
982 DefineSsbos(*this, storage_types.F32, &StorageDefinitions::F32, info, binding, F32[1],
983 sizeof(f32));
984 }
985 if (True(used_types & IR::Type::U64)) {
986 DefineSsbos(*this, storage_types.U64, &StorageDefinitions::U64, info, binding, U64,
987 sizeof(u64));
988 }
989 if (True(used_types & IR::Type::U32x2)) {
990 DefineSsbos(*this, storage_types.U32x2, &StorageDefinitions::U32x2, info, binding, U32[2],
991 sizeof(u32[2]));
992 }
993 if (True(used_types & IR::Type::U32x4)) {
994 DefineSsbos(*this, storage_types.U32x4, &StorageDefinitions::U32x4, info, binding, U32[4],
995 sizeof(u32[4]));
996 }
997 for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
998 binding += desc.count;
999 }
1000 const bool needs_function{
1001 info.uses_global_increment || info.uses_global_decrement || info.uses_atomic_f32_add ||
1002 info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max ||
1003 info.uses_atomic_f32x2_add || info.uses_atomic_f32x2_min || info.uses_atomic_f32x2_max};
1004 if (needs_function) {
1005 AddCapability(spv::Capability::VariablePointersStorageBuffer);
1006 }
1007 if (info.uses_global_increment) {
1008 increment_cas_ssbo = CasLoop(*this, Operation::Increment, storage_types.U32.array,
1009 storage_types.U32.element, U32[1], U32[1], spv::Scope::Device);
1010 }
1011 if (info.uses_global_decrement) {
1012 decrement_cas_ssbo = CasLoop(*this, Operation::Decrement, storage_types.U32.array,
1013 storage_types.U32.element, U32[1], U32[1], spv::Scope::Device);
1014 }
1015 if (info.uses_atomic_f32_add) {
1016 f32_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array,
1017 storage_types.U32.element, F32[1], U32[1], spv::Scope::Device);
1018 }
1019 if (info.uses_atomic_f16x2_add) {
1020 f16x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array,
1021 storage_types.U32.element, F16[2], F16[2], spv::Scope::Device);
1022 }
1023 if (info.uses_atomic_f16x2_min) {
1024 f16x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array,
1025 storage_types.U32.element, F16[2], F16[2], spv::Scope::Device);
1026 }
1027 if (info.uses_atomic_f16x2_max) {
1028 f16x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array,
1029 storage_types.U32.element, F16[2], F16[2], spv::Scope::Device);
1030 }
1031 if (info.uses_atomic_f32x2_add) {
1032 f32x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array,
1033 storage_types.U32.element, F32[2], F32[2], spv::Scope::Device);
1034 }
1035 if (info.uses_atomic_f32x2_min) {
1036 f32x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array,
1037 storage_types.U32.element, F32[2], F32[2], spv::Scope::Device);
1038 }
1039 if (info.uses_atomic_f32x2_max) {
1040 f32x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array,
1041 storage_types.U32.element, F32[2], F32[2], spv::Scope::Device);
1042 }
1043}
1044
1045void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) {
1046 if (info.texture_buffer_descriptors.empty()) {
1047 return;
1048 }
1049 const spv::ImageFormat format{spv::ImageFormat::Unknown};
1050 image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format);
1051 sampled_texture_buffer_type = TypeSampledImage(image_buffer_type);
1052
1053 const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)};
1054 texture_buffers.reserve(info.texture_buffer_descriptors.size());
1055 for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) {
1056 if (desc.count != 1) {
1057 throw NotImplementedException("Array of texture buffers");
1058 }
1059 const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)};
1060 Decorate(id, spv::Decoration::Binding, binding);
1061 Decorate(id, spv::Decoration::DescriptorSet, 0U);
1062 Name(id, NameOf(stage, desc, "texbuf"));
1063 texture_buffers.push_back({
1064 .id = id,
1065 .count = desc.count,
1066 });
1067 if (profile.supported_spirv >= 0x00010400) {
1068 interfaces.push_back(id);
1069 }
1070 ++binding;
1071 }
1072}
1073
1074void EmitContext::DefineImageBuffers(const Info& info, u32& binding) {
1075 image_buffers.reserve(info.image_buffer_descriptors.size());
1076 for (const ImageBufferDescriptor& desc : info.image_buffer_descriptors) {
1077 if (desc.count != 1) {
1078 throw NotImplementedException("Array of image buffers");
1079 }
1080 const spv::ImageFormat format{GetImageFormat(desc.format)};
1081 const Id image_type{TypeImage(U32[1], spv::Dim::Buffer, false, false, false, 2, format)};
1082 const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
1083 const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
1084 Decorate(id, spv::Decoration::Binding, binding);
1085 Decorate(id, spv::Decoration::DescriptorSet, 0U);
1086 Name(id, NameOf(stage, desc, "imgbuf"));
1087 image_buffers.push_back({
1088 .id = id,
1089 .image_type = image_type,
1090 .count = desc.count,
1091 });
1092 if (profile.supported_spirv >= 0x00010400) {
1093 interfaces.push_back(id);
1094 }
1095 ++binding;
1096 }
1097}
1098
1099void EmitContext::DefineTextures(const Info& info, u32& binding) {
1100 textures.reserve(info.texture_descriptors.size());
1101 for (const TextureDescriptor& desc : info.texture_descriptors) {
1102 const Id image_type{ImageType(*this, desc)};
1103 const Id sampled_type{TypeSampledImage(image_type)};
1104 const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)};
1105 const Id desc_type{DescType(*this, sampled_type, pointer_type, desc.count)};
1106 const Id id{AddGlobalVariable(desc_type, spv::StorageClass::UniformConstant)};
1107 Decorate(id, spv::Decoration::Binding, binding);
1108 Decorate(id, spv::Decoration::DescriptorSet, 0U);
1109 Name(id, NameOf(stage, desc, "tex"));
1110 textures.push_back({
1111 .id = id,
1112 .sampled_type = sampled_type,
1113 .pointer_type = pointer_type,
1114 .image_type = image_type,
1115 .count = desc.count,
1116 });
1117 if (profile.supported_spirv >= 0x00010400) {
1118 interfaces.push_back(id);
1119 }
1120 ++binding;
1121 }
1122 if (info.uses_atomic_image_u32) {
1123 image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
1124 }
1125}
1126
1127void EmitContext::DefineImages(const Info& info, u32& binding) {
1128 images.reserve(info.image_descriptors.size());
1129 for (const ImageDescriptor& desc : info.image_descriptors) {
1130 if (desc.count != 1) {
1131 throw NotImplementedException("Array of images");
1132 }
1133 const Id image_type{ImageType(*this, desc)};
1134 const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
1135 const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
1136 Decorate(id, spv::Decoration::Binding, binding);
1137 Decorate(id, spv::Decoration::DescriptorSet, 0U);
1138 Name(id, NameOf(stage, desc, "img"));
1139 images.push_back({
1140 .id = id,
1141 .image_type = image_type,
1142 .count = desc.count,
1143 });
1144 if (profile.supported_spirv >= 0x00010400) {
1145 interfaces.push_back(id);
1146 }
1147 ++binding;
1148 }
1149}
1150
1151void EmitContext::DefineInputs(const IR::Program& program) {
1152 const Info& info{program.info};
1153 const VaryingState loads{info.loads.mask | info.passthrough.mask};
1154
1155 if (info.uses_workgroup_id) {
1156 workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId);
1157 }
1158 if (info.uses_local_invocation_id) {
1159 local_invocation_id = DefineInput(*this, U32[3], false, spv::BuiltIn::LocalInvocationId);
1160 }
1161 if (info.uses_invocation_id) {
1162 invocation_id = DefineInput(*this, U32[1], false, spv::BuiltIn::InvocationId);
1163 }
1164 if (info.uses_sample_id) {
1165 sample_id = DefineInput(*this, U32[1], false, spv::BuiltIn::SampleId);
1166 }
1167 if (info.uses_is_helper_invocation) {
1168 is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation);
1169 }
1170 if (info.uses_subgroup_mask) {
1171 subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR);
1172 subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR);
1173 subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR);
1174 subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR);
1175 subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR);
1176 }
1177 if (info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles ||
1178 (profile.warp_size_potentially_larger_than_guest &&
1179 (info.uses_subgroup_vote || info.uses_subgroup_mask))) {
1180 subgroup_local_invocation_id =
1181 DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId);
1182 }
1183 if (info.uses_fswzadd) {
1184 const Id f32_one{Const(1.0f)};
1185 const Id f32_minus_one{Const(-1.0f)};
1186 const Id f32_zero{Const(0.0f)};
1187 fswzadd_lut_a = ConstantComposite(F32[4], f32_minus_one, f32_one, f32_minus_one, f32_zero);
1188 fswzadd_lut_b =
1189 ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one);
1190 }
1191 if (loads[IR::Attribute::PrimitiveId]) {
1192 primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId);
1193 }
1194 if (loads.AnyComponent(IR::Attribute::PositionX)) {
1195 const bool is_fragment{stage != Stage::Fragment};
1196 const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
1197 input_position = DefineInput(*this, F32[4], true, built_in);
1198 if (profile.support_geometry_shader_passthrough) {
1199 if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
1200 Decorate(input_position, spv::Decoration::PassthroughNV);
1201 }
1202 }
1203 }
1204 if (loads[IR::Attribute::InstanceId]) {
1205 if (profile.support_vertex_instance_id) {
1206 instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId);
1207 } else {
1208 instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex);
1209 base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
1210 }
1211 }
1212 if (loads[IR::Attribute::VertexId]) {
1213 if (profile.support_vertex_instance_id) {
1214 vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId);
1215 } else {
1216 vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex);
1217 base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
1218 }
1219 }
1220 if (loads[IR::Attribute::FrontFace]) {
1221 front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing);
1222 }
1223 if (loads[IR::Attribute::PointSpriteS] || loads[IR::Attribute::PointSpriteT]) {
1224 point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord);
1225 }
1226 if (loads[IR::Attribute::TessellationEvaluationPointU] ||
1227 loads[IR::Attribute::TessellationEvaluationPointV]) {
1228 tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord);
1229 }
1230 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
1231 const AttributeType input_type{runtime_info.generic_input_types[index]};
1232 if (!runtime_info.previous_stage_stores.Generic(index)) {
1233 continue;
1234 }
1235 if (!loads.Generic(index)) {
1236 continue;
1237 }
1238 if (input_type == AttributeType::Disabled) {
1239 continue;
1240 }
1241 const Id type{GetAttributeType(*this, input_type)};
1242 const Id id{DefineInput(*this, type, true)};
1243 Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
1244 Name(id, fmt::format("in_attr{}", index));
1245 input_generics[index] = id;
1246
1247 if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) {
1248 Decorate(id, spv::Decoration::PassthroughNV);
1249 }
1250 if (stage != Stage::Fragment) {
1251 continue;
1252 }
1253 switch (info.interpolation[index]) {
1254 case Interpolation::Smooth:
1255 // Default
1256 // Decorate(id, spv::Decoration::Smooth);
1257 break;
1258 case Interpolation::NoPerspective:
1259 Decorate(id, spv::Decoration::NoPerspective);
1260 break;
1261 case Interpolation::Flat:
1262 Decorate(id, spv::Decoration::Flat);
1263 break;
1264 }
1265 }
1266 if (stage == Stage::TessellationEval) {
1267 for (size_t index = 0; index < info.uses_patches.size(); ++index) {
1268 if (!info.uses_patches[index]) {
1269 continue;
1270 }
1271 const Id id{DefineInput(*this, F32[4], false)};
1272 Decorate(id, spv::Decoration::Patch);
1273 Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
1274 patches[index] = id;
1275 }
1276 }
1277}
1278
1279void EmitContext::DefineOutputs(const IR::Program& program) {
1280 const Info& info{program.info};
1281 const std::optional<u32> invocations{program.invocations};
1282 if (info.stores.AnyComponent(IR::Attribute::PositionX) || stage == Stage::VertexB) {
1283 output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position);
1284 }
1285 if (info.stores[IR::Attribute::PointSize] || runtime_info.fixed_state_point_size) {
1286 if (stage == Stage::Fragment) {
1287 throw NotImplementedException("Storing PointSize in fragment stage");
1288 }
1289 output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize);
1290 }
1291 if (info.stores.ClipDistances()) {
1292 if (stage == Stage::Fragment) {
1293 throw NotImplementedException("Storing ClipDistance in fragment stage");
1294 }
1295 const Id type{TypeArray(F32[1], Const(8U))};
1296 clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance);
1297 }
1298 if (info.stores[IR::Attribute::Layer] &&
1299 (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) {
1300 if (stage == Stage::Fragment) {
1301 throw NotImplementedException("Storing Layer in fragment stage");
1302 }
1303 layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer);
1304 }
1305 if (info.stores[IR::Attribute::ViewportIndex] &&
1306 (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) {
1307 if (stage == Stage::Fragment) {
1308 throw NotImplementedException("Storing ViewportIndex in fragment stage");
1309 }
1310 viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex);
1311 }
1312 if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) {
1313 viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt,
1314 spv::BuiltIn::ViewportMaskNV);
1315 }
1316 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
1317 if (info.stores.Generic(index)) {
1318 DefineGenericOutput(*this, index, invocations);
1319 }
1320 }
1321 switch (stage) {
1322 case Stage::TessellationControl:
1323 if (info.stores_tess_level_outer) {
1324 const Id type{TypeArray(F32[1], Const(4U))};
1325 output_tess_level_outer =
1326 DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelOuter);
1327 Decorate(output_tess_level_outer, spv::Decoration::Patch);
1328 }
1329 if (info.stores_tess_level_inner) {
1330 const Id type{TypeArray(F32[1], Const(2U))};
1331 output_tess_level_inner =
1332 DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelInner);
1333 Decorate(output_tess_level_inner, spv::Decoration::Patch);
1334 }
1335 for (size_t index = 0; index < info.uses_patches.size(); ++index) {
1336 if (!info.uses_patches[index]) {
1337 continue;
1338 }
1339 const Id id{DefineOutput(*this, F32[4], std::nullopt)};
1340 Decorate(id, spv::Decoration::Patch);
1341 Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
1342 patches[index] = id;
1343 }
1344 break;
1345 case Stage::Fragment:
1346 for (u32 index = 0; index < 8; ++index) {
1347 if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) {
1348 continue;
1349 }
1350 frag_color[index] = DefineOutput(*this, F32[4], std::nullopt);
1351 Decorate(frag_color[index], spv::Decoration::Location, index);
1352 Name(frag_color[index], fmt::format("frag_color{}", index));
1353 }
1354 if (info.stores_frag_depth) {
1355 frag_depth = DefineOutput(*this, F32[1], std::nullopt);
1356 Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
1357 }
1358 if (info.stores_sample_mask) {
1359 sample_mask = DefineOutput(*this, U32[1], std::nullopt);
1360 Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask);
1361 }
1362 break;
1363 default:
1364 break;
1365 }
1366}
1367
1368} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
new file mode 100644
index 000000000..e277bc358
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -0,0 +1,307 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <string_view>
9
10#include <sirit/sirit.h>
11
12#include "shader_recompiler/backend/bindings.h"
13#include "shader_recompiler/frontend/ir/program.h"
14#include "shader_recompiler/profile.h"
15#include "shader_recompiler/runtime_info.h"
16#include "shader_recompiler/shader_info.h"
17
18namespace Shader::Backend::SPIRV {
19
20using Sirit::Id;
21
22class VectorTypes {
23public:
24 void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name);
25
26 [[nodiscard]] Id operator[](size_t size) const noexcept {
27 return defs[size - 1];
28 }
29
30private:
31 std::array<Id, 4> defs{};
32};
33
34struct TextureDefinition {
35 Id id;
36 Id sampled_type;
37 Id pointer_type;
38 Id image_type;
39 u32 count;
40};
41
42struct TextureBufferDefinition {
43 Id id;
44 u32 count;
45};
46
47struct ImageBufferDefinition {
48 Id id;
49 Id image_type;
50 u32 count;
51};
52
53struct ImageDefinition {
54 Id id;
55 Id image_type;
56 u32 count;
57};
58
59struct UniformDefinitions {
60 Id U8{};
61 Id S8{};
62 Id U16{};
63 Id S16{};
64 Id U32{};
65 Id F32{};
66 Id U32x2{};
67 Id U32x4{};
68};
69
70struct StorageTypeDefinition {
71 Id array{};
72 Id element{};
73};
74
75struct StorageTypeDefinitions {
76 StorageTypeDefinition U8{};
77 StorageTypeDefinition S8{};
78 StorageTypeDefinition U16{};
79 StorageTypeDefinition S16{};
80 StorageTypeDefinition U32{};
81 StorageTypeDefinition U64{};
82 StorageTypeDefinition F32{};
83 StorageTypeDefinition U32x2{};
84 StorageTypeDefinition U32x4{};
85};
86
87struct StorageDefinitions {
88 Id U8{};
89 Id S8{};
90 Id U16{};
91 Id S16{};
92 Id U32{};
93 Id F32{};
94 Id U64{};
95 Id U32x2{};
96 Id U32x4{};
97};
98
99struct GenericElementInfo {
100 Id id{};
101 u32 first_element{};
102 u32 num_components{};
103};
104
105class EmitContext final : public Sirit::Module {
106public:
107 explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info,
108 IR::Program& program, Bindings& binding);
109 ~EmitContext();
110
111 [[nodiscard]] Id Def(const IR::Value& value);
112
113 [[nodiscard]] Id BitOffset8(const IR::Value& offset);
114 [[nodiscard]] Id BitOffset16(const IR::Value& offset);
115
116 Id Const(u32 value) {
117 return Constant(U32[1], value);
118 }
119
120 Id Const(u32 element_1, u32 element_2) {
121 return ConstantComposite(U32[2], Const(element_1), Const(element_2));
122 }
123
124 Id Const(u32 element_1, u32 element_2, u32 element_3) {
125 return ConstantComposite(U32[3], Const(element_1), Const(element_2), Const(element_3));
126 }
127
128 Id Const(u32 element_1, u32 element_2, u32 element_3, u32 element_4) {
129 return ConstantComposite(U32[4], Const(element_1), Const(element_2), Const(element_3),
130 Const(element_4));
131 }
132
133 Id SConst(s32 value) {
134 return Constant(S32[1], value);
135 }
136
137 Id SConst(s32 element_1, s32 element_2) {
138 return ConstantComposite(S32[2], SConst(element_1), SConst(element_2));
139 }
140
141 Id SConst(s32 element_1, s32 element_2, s32 element_3) {
142 return ConstantComposite(S32[3], SConst(element_1), SConst(element_2), SConst(element_3));
143 }
144
145 Id SConst(s32 element_1, s32 element_2, s32 element_3, s32 element_4) {
146 return ConstantComposite(S32[4], SConst(element_1), SConst(element_2), SConst(element_3),
147 SConst(element_4));
148 }
149
150 Id Const(f32 value) {
151 return Constant(F32[1], value);
152 }
153
154 const Profile& profile;
155 const RuntimeInfo& runtime_info;
156 Stage stage{};
157
158 Id void_id{};
159 Id U1{};
160 Id U8{};
161 Id S8{};
162 Id U16{};
163 Id S16{};
164 Id U64{};
165 VectorTypes F32;
166 VectorTypes U32;
167 VectorTypes S32;
168 VectorTypes F16;
169 VectorTypes F64;
170
171 Id true_value{};
172 Id false_value{};
173 Id u32_zero_value{};
174 Id f32_zero_value{};
175
176 UniformDefinitions uniform_types;
177 StorageTypeDefinitions storage_types;
178
179 Id private_u32{};
180
181 Id shared_u8{};
182 Id shared_u16{};
183 Id shared_u32{};
184 Id shared_u64{};
185 Id shared_u32x2{};
186 Id shared_u32x4{};
187
188 Id input_f32{};
189 Id input_u32{};
190 Id input_s32{};
191
192 Id output_f32{};
193 Id output_u32{};
194
195 Id image_buffer_type{};
196 Id sampled_texture_buffer_type{};
197 Id image_u32{};
198
199 std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{};
200 std::array<StorageDefinitions, Info::MAX_SSBOS> ssbos{};
201 std::vector<TextureBufferDefinition> texture_buffers;
202 std::vector<ImageBufferDefinition> image_buffers;
203 std::vector<TextureDefinition> textures;
204 std::vector<ImageDefinition> images;
205
206 Id workgroup_id{};
207 Id local_invocation_id{};
208 Id invocation_id{};
209 Id sample_id{};
210 Id is_helper_invocation{};
211 Id subgroup_local_invocation_id{};
212 Id subgroup_mask_eq{};
213 Id subgroup_mask_lt{};
214 Id subgroup_mask_le{};
215 Id subgroup_mask_gt{};
216 Id subgroup_mask_ge{};
217 Id instance_id{};
218 Id instance_index{};
219 Id base_instance{};
220 Id vertex_id{};
221 Id vertex_index{};
222 Id base_vertex{};
223 Id front_face{};
224 Id point_coord{};
225 Id tess_coord{};
226 Id clip_distances{};
227 Id layer{};
228 Id viewport_index{};
229 Id viewport_mask{};
230 Id primitive_id{};
231
232 Id fswzadd_lut_a{};
233 Id fswzadd_lut_b{};
234
235 Id indexed_load_func{};
236 Id indexed_store_func{};
237
238 Id local_memory{};
239
240 Id shared_memory_u8{};
241 Id shared_memory_u16{};
242 Id shared_memory_u32{};
243 Id shared_memory_u64{};
244 Id shared_memory_u32x2{};
245 Id shared_memory_u32x4{};
246
247 Id shared_memory_u32_type{};
248
249 Id shared_store_u8_func{};
250 Id shared_store_u16_func{};
251 Id increment_cas_shared{};
252 Id increment_cas_ssbo{};
253 Id decrement_cas_shared{};
254 Id decrement_cas_ssbo{};
255 Id f32_add_cas{};
256 Id f16x2_add_cas{};
257 Id f16x2_min_cas{};
258 Id f16x2_max_cas{};
259 Id f32x2_add_cas{};
260 Id f32x2_min_cas{};
261 Id f32x2_max_cas{};
262
263 Id load_global_func_u32{};
264 Id load_global_func_u32x2{};
265 Id load_global_func_u32x4{};
266 Id write_global_func_u32{};
267 Id write_global_func_u32x2{};
268 Id write_global_func_u32x4{};
269
270 Id input_position{};
271 std::array<Id, 32> input_generics{};
272
273 Id output_point_size{};
274 Id output_position{};
275 std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
276
277 Id output_tess_level_outer{};
278 Id output_tess_level_inner{};
279 std::array<Id, 30> patches{};
280
281 std::array<Id, 8> frag_color{};
282 Id sample_mask{};
283 Id frag_depth{};
284
285 std::vector<Id> interfaces;
286
287private:
288 void DefineCommonTypes(const Info& info);
289 void DefineCommonConstants();
290 void DefineInterfaces(const IR::Program& program);
291 void DefineLocalMemory(const IR::Program& program);
292 void DefineSharedMemory(const IR::Program& program);
293 void DefineSharedMemoryFunctions(const IR::Program& program);
294 void DefineConstantBuffers(const Info& info, u32& binding);
295 void DefineStorageBuffers(const Info& info, u32& binding);
296 void DefineTextureBuffers(const Info& info, u32& binding);
297 void DefineImageBuffers(const Info& info, u32& binding);
298 void DefineTextures(const Info& info, u32& binding);
299 void DefineImages(const Info& info, u32& binding);
300 void DefineAttributeMemAccess(const Info& info);
301 void DefineGlobalMemoryFunctions(const Info& info);
302
303 void DefineInputs(const IR::Program& program);
304 void DefineOutputs(const IR::Program& program);
305};
306
307} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
new file mode 100644
index 000000000..d7a86e270
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -0,0 +1,541 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <span>
6#include <tuple>
7#include <type_traits>
8#include <utility>
9#include <vector>
10
11#include "common/settings.h"
12#include "shader_recompiler/backend/spirv/emit_spirv.h"
13#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
14#include "shader_recompiler/frontend/ir/basic_block.h"
15#include "shader_recompiler/frontend/ir/program.h"
16
17namespace Shader::Backend::SPIRV {
18namespace {
19template <class Func>
20struct FuncTraits {};
21
22template <class ReturnType_, class... Args>
23struct FuncTraits<ReturnType_ (*)(Args...)> {
24 using ReturnType = ReturnType_;
25
26 static constexpr size_t NUM_ARGS = sizeof...(Args);
27
28 template <size_t I>
29 using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
30};
31
32template <auto func, typename... Args>
33void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) {
34 inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...));
35}
36
37template <typename ArgType>
38ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
39 if constexpr (std::is_same_v<ArgType, Id>) {
40 return ctx.Def(arg);
41 } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
42 return arg;
43 } else if constexpr (std::is_same_v<ArgType, u32>) {
44 return arg.U32();
45 } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
46 return arg.Attribute();
47 } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
48 return arg.Patch();
49 } else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
50 return arg.Reg();
51 }
52}
53
54template <auto func, bool is_first_arg_inst, size_t... I>
55void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
56 using Traits = FuncTraits<decltype(func)>;
57 if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) {
58 if constexpr (is_first_arg_inst) {
59 SetDefinition<func>(
60 ctx, inst, inst,
61 Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
62 } else {
63 SetDefinition<func>(
64 ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
65 }
66 } else {
67 if constexpr (is_first_arg_inst) {
68 func(ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
69 } else {
70 func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
71 }
72 }
73}
74
75template <auto func>
76void Invoke(EmitContext& ctx, IR::Inst* inst) {
77 using Traits = FuncTraits<decltype(func)>;
78 static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
79 if constexpr (Traits::NUM_ARGS == 1) {
80 Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
81 } else {
82 using FirstArgType = typename Traits::template ArgType<1>;
83 static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst*>;
84 using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
85 Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
86 }
87}
88
89void EmitInst(EmitContext& ctx, IR::Inst* inst) {
90 switch (inst->GetOpcode()) {
91#define OPCODE(name, result_type, ...) \
92 case IR::Opcode::name: \
93 return Invoke<&Emit##name>(ctx, inst);
94#include "shader_recompiler/frontend/ir/opcodes.inc"
95#undef OPCODE
96 }
97 throw LogicError("Invalid opcode {}", inst->GetOpcode());
98}
99
100Id TypeId(const EmitContext& ctx, IR::Type type) {
101 switch (type) {
102 case IR::Type::U1:
103 return ctx.U1;
104 case IR::Type::U32:
105 return ctx.U32[1];
106 default:
107 throw NotImplementedException("Phi node type {}", type);
108 }
109}
110
111void Traverse(EmitContext& ctx, IR::Program& program) {
112 IR::Block* current_block{};
113 for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
114 switch (node.type) {
115 case IR::AbstractSyntaxNode::Type::Block: {
116 const Id label{node.data.block->Definition<Id>()};
117 if (current_block) {
118 ctx.OpBranch(label);
119 }
120 current_block = node.data.block;
121 ctx.AddLabel(label);
122 for (IR::Inst& inst : node.data.block->Instructions()) {
123 EmitInst(ctx, &inst);
124 }
125 break;
126 }
127 case IR::AbstractSyntaxNode::Type::If: {
128 const Id if_label{node.data.if_node.body->Definition<Id>()};
129 const Id endif_label{node.data.if_node.merge->Definition<Id>()};
130 ctx.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
131 ctx.OpBranchConditional(ctx.Def(node.data.if_node.cond), if_label, endif_label);
132 break;
133 }
134 case IR::AbstractSyntaxNode::Type::Loop: {
135 const Id body_label{node.data.loop.body->Definition<Id>()};
136 const Id continue_label{node.data.loop.continue_block->Definition<Id>()};
137 const Id endloop_label{node.data.loop.merge->Definition<Id>()};
138
139 ctx.OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone);
140 ctx.OpBranch(body_label);
141 break;
142 }
143 case IR::AbstractSyntaxNode::Type::Break: {
144 const Id break_label{node.data.break_node.merge->Definition<Id>()};
145 const Id skip_label{node.data.break_node.skip->Definition<Id>()};
146 ctx.OpBranchConditional(ctx.Def(node.data.break_node.cond), break_label, skip_label);
147 break;
148 }
149 case IR::AbstractSyntaxNode::Type::EndIf:
150 if (current_block) {
151 ctx.OpBranch(node.data.end_if.merge->Definition<Id>());
152 }
153 break;
154 case IR::AbstractSyntaxNode::Type::Repeat: {
155 Id cond{ctx.Def(node.data.repeat.cond)};
156 if (!Settings::values.disable_shader_loop_safety_checks) {
157 const Id pointer_type{ctx.TypePointer(spv::StorageClass::Private, ctx.U32[1])};
158 const Id safety_counter{ctx.AddGlobalVariable(
159 pointer_type, spv::StorageClass::Private, ctx.Const(0x2000u))};
160 if (ctx.profile.supported_spirv >= 0x00010400) {
161 ctx.interfaces.push_back(safety_counter);
162 }
163 const Id old_counter{ctx.OpLoad(ctx.U32[1], safety_counter)};
164 const Id new_counter{ctx.OpISub(ctx.U32[1], old_counter, ctx.Const(1u))};
165 ctx.OpStore(safety_counter, new_counter);
166
167 const Id safety_cond{
168 ctx.OpSGreaterThanEqual(ctx.U1, new_counter, ctx.u32_zero_value)};
169 cond = ctx.OpLogicalAnd(ctx.U1, cond, safety_cond);
170 }
171 const Id loop_header_label{node.data.repeat.loop_header->Definition<Id>()};
172 const Id merge_label{node.data.repeat.merge->Definition<Id>()};
173 ctx.OpBranchConditional(cond, loop_header_label, merge_label);
174 break;
175 }
176 case IR::AbstractSyntaxNode::Type::Return:
177 ctx.OpReturn();
178 break;
179 case IR::AbstractSyntaxNode::Type::Unreachable:
180 ctx.OpUnreachable();
181 break;
182 }
183 if (node.type != IR::AbstractSyntaxNode::Type::Block) {
184 current_block = nullptr;
185 }
186 }
187}
188
189Id DefineMain(EmitContext& ctx, IR::Program& program) {
190 const Id void_function{ctx.TypeFunction(ctx.void_id)};
191 const Id main{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)};
192 for (IR::Block* const block : program.blocks) {
193 block->SetDefinition(ctx.OpLabel());
194 }
195 Traverse(ctx, program);
196 ctx.OpFunctionEnd();
197 return main;
198}
199
200spv::ExecutionMode ExecutionMode(TessPrimitive primitive) {
201 switch (primitive) {
202 case TessPrimitive::Isolines:
203 return spv::ExecutionMode::Isolines;
204 case TessPrimitive::Triangles:
205 return spv::ExecutionMode::Triangles;
206 case TessPrimitive::Quads:
207 return spv::ExecutionMode::Quads;
208 }
209 throw InvalidArgument("Tessellation primitive {}", primitive);
210}
211
212spv::ExecutionMode ExecutionMode(TessSpacing spacing) {
213 switch (spacing) {
214 case TessSpacing::Equal:
215 return spv::ExecutionMode::SpacingEqual;
216 case TessSpacing::FractionalOdd:
217 return spv::ExecutionMode::SpacingFractionalOdd;
218 case TessSpacing::FractionalEven:
219 return spv::ExecutionMode::SpacingFractionalEven;
220 }
221 throw InvalidArgument("Tessellation spacing {}", spacing);
222}
223
224void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
225 const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
226 spv::ExecutionModel execution_model{};
227 switch (program.stage) {
228 case Stage::Compute: {
229 const std::array<u32, 3> workgroup_size{program.workgroup_size};
230 execution_model = spv::ExecutionModel::GLCompute;
231 ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
232 workgroup_size[1], workgroup_size[2]);
233 break;
234 }
235 case Stage::VertexB:
236 execution_model = spv::ExecutionModel::Vertex;
237 break;
238 case Stage::TessellationControl:
239 execution_model = spv::ExecutionModel::TessellationControl;
240 ctx.AddCapability(spv::Capability::Tessellation);
241 ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.invocations);
242 break;
243 case Stage::TessellationEval:
244 execution_model = spv::ExecutionModel::TessellationEvaluation;
245 ctx.AddCapability(spv::Capability::Tessellation);
246 ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_primitive));
247 ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_spacing));
248 ctx.AddExecutionMode(main, ctx.runtime_info.tess_clockwise
249 ? spv::ExecutionMode::VertexOrderCw
250 : spv::ExecutionMode::VertexOrderCcw);
251 break;
252 case Stage::Geometry:
253 execution_model = spv::ExecutionModel::Geometry;
254 ctx.AddCapability(spv::Capability::Geometry);
255 ctx.AddCapability(spv::Capability::GeometryStreams);
256 switch (ctx.runtime_info.input_topology) {
257 case InputTopology::Points:
258 ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints);
259 break;
260 case InputTopology::Lines:
261 ctx.AddExecutionMode(main, spv::ExecutionMode::InputLines);
262 break;
263 case InputTopology::LinesAdjacency:
264 ctx.AddExecutionMode(main, spv::ExecutionMode::InputLinesAdjacency);
265 break;
266 case InputTopology::Triangles:
267 ctx.AddExecutionMode(main, spv::ExecutionMode::Triangles);
268 break;
269 case InputTopology::TrianglesAdjacency:
270 ctx.AddExecutionMode(main, spv::ExecutionMode::InputTrianglesAdjacency);
271 break;
272 }
273 switch (program.output_topology) {
274 case OutputTopology::PointList:
275 ctx.AddExecutionMode(main, spv::ExecutionMode::OutputPoints);
276 break;
277 case OutputTopology::LineStrip:
278 ctx.AddExecutionMode(main, spv::ExecutionMode::OutputLineStrip);
279 break;
280 case OutputTopology::TriangleStrip:
281 ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip);
282 break;
283 }
284 if (program.info.stores[IR::Attribute::PointSize]) {
285 ctx.AddCapability(spv::Capability::GeometryPointSize);
286 }
287 ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices);
288 ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations);
289 if (program.is_geometry_passthrough) {
290 if (ctx.profile.support_geometry_shader_passthrough) {
291 ctx.AddExtension("SPV_NV_geometry_shader_passthrough");
292 ctx.AddCapability(spv::Capability::GeometryShaderPassthroughNV);
293 } else {
294 LOG_WARNING(Shader_SPIRV, "Geometry shader passthrough used with no support");
295 }
296 }
297 break;
298 case Stage::Fragment:
299 execution_model = spv::ExecutionModel::Fragment;
300 if (ctx.profile.lower_left_origin_mode) {
301 ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft);
302 } else {
303 ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
304 }
305 if (program.info.stores_frag_depth) {
306 ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
307 }
308 if (ctx.runtime_info.force_early_z) {
309 ctx.AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests);
310 }
311 break;
312 default:
313 throw NotImplementedException("Stage {}", program.stage);
314 }
315 ctx.AddEntryPoint(execution_model, main, "main", interfaces);
316}
317
318void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx,
319 Id main_func) {
320 const Info& info{program.info};
321 if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) {
322 LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader");
323 } else if (info.uses_fp32_denorms_flush) {
324 if (profile.support_fp32_denorm_flush) {
325 ctx.AddCapability(spv::Capability::DenormFlushToZero);
326 ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U);
327 } else {
328 // Drivers will most likely flush denorms by default, no need to warn
329 }
330 } else if (info.uses_fp32_denorms_preserve) {
331 if (profile.support_fp32_denorm_preserve) {
332 ctx.AddCapability(spv::Capability::DenormPreserve);
333 ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U);
334 } else {
335 LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support");
336 }
337 }
338 if (!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls) {
339 // No separate denorm behavior
340 return;
341 }
342 if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) {
343 LOG_DEBUG(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader");
344 } else if (info.uses_fp16_denorms_flush) {
345 if (profile.support_fp16_denorm_flush) {
346 ctx.AddCapability(spv::Capability::DenormFlushToZero);
347 ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 16U);
348 } else {
349 // Same as fp32, no need to warn as most drivers will flush by default
350 }
351 } else if (info.uses_fp16_denorms_preserve) {
352 if (profile.support_fp16_denorm_preserve) {
353 ctx.AddCapability(spv::Capability::DenormPreserve);
354 ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U);
355 } else {
356 LOG_DEBUG(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support");
357 }
358 }
359}
360
361void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program,
362 EmitContext& ctx, Id main_func) {
363 if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) {
364 return;
365 }
366 if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) {
367 ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
368 ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U);
369 }
370 if (profile.support_fp32_signed_zero_nan_preserve) {
371 ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
372 ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U);
373 }
374 if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) {
375 ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
376 ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 64U);
377 }
378}
379
380void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) {
381 if (info.uses_sampled_1d) {
382 ctx.AddCapability(spv::Capability::Sampled1D);
383 }
384 if (info.uses_sparse_residency) {
385 ctx.AddCapability(spv::Capability::SparseResidency);
386 }
387 if (info.uses_demote_to_helper_invocation && profile.support_demote_to_helper_invocation) {
388 ctx.AddExtension("SPV_EXT_demote_to_helper_invocation");
389 ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
390 }
391 if (info.stores[IR::Attribute::ViewportIndex]) {
392 ctx.AddCapability(spv::Capability::MultiViewport);
393 }
394 if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) {
395 ctx.AddExtension("SPV_NV_viewport_array2");
396 ctx.AddCapability(spv::Capability::ShaderViewportMaskNV);
397 }
398 if (info.stores[IR::Attribute::Layer] || info.stores[IR::Attribute::ViewportIndex]) {
399 if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) {
400 ctx.AddExtension("SPV_EXT_shader_viewport_index_layer");
401 ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT);
402 }
403 }
404 if (!profile.support_vertex_instance_id &&
405 (info.loads[IR::Attribute::InstanceId] || info.loads[IR::Attribute::VertexId])) {
406 ctx.AddExtension("SPV_KHR_shader_draw_parameters");
407 ctx.AddCapability(spv::Capability::DrawParameters);
408 }
409 if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id ||
410 info.uses_subgroup_shuffles) &&
411 profile.support_vote) {
412 ctx.AddExtension("SPV_KHR_shader_ballot");
413 ctx.AddCapability(spv::Capability::SubgroupBallotKHR);
414 if (!profile.warp_size_potentially_larger_than_guest) {
415 // vote ops are only used when not taking the long path
416 ctx.AddExtension("SPV_KHR_subgroup_vote");
417 ctx.AddCapability(spv::Capability::SubgroupVoteKHR);
418 }
419 }
420 if (info.uses_int64_bit_atomics && profile.support_int64_atomics) {
421 ctx.AddCapability(spv::Capability::Int64Atomics);
422 }
423 if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) {
424 ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat);
425 }
426 if (info.uses_typeless_image_writes) {
427 ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
428 }
429 if (info.uses_image_buffers) {
430 ctx.AddCapability(spv::Capability::ImageBuffer);
431 }
432 if (info.uses_sample_id) {
433 ctx.AddCapability(spv::Capability::SampleRateShading);
434 }
435 if (!ctx.runtime_info.xfb_varyings.empty()) {
436 ctx.AddCapability(spv::Capability::TransformFeedback);
437 }
438 if (info.uses_derivatives) {
439 ctx.AddCapability(spv::Capability::DerivativeControl);
440 }
441 // TODO: Track this usage
442 ctx.AddCapability(spv::Capability::ImageGatherExtended);
443 ctx.AddCapability(spv::Capability::ImageQuery);
444 ctx.AddCapability(spv::Capability::SampledBuffer);
445}
446
447void PatchPhiNodes(IR::Program& program, EmitContext& ctx) {
448 auto inst{program.blocks.front()->begin()};
449 size_t block_index{0};
450 ctx.PatchDeferredPhi([&](size_t phi_arg) {
451 if (phi_arg == 0) {
452 ++inst;
453 if (inst == program.blocks[block_index]->end() ||
454 inst->GetOpcode() != IR::Opcode::Phi) {
455 do {
456 ++block_index;
457 inst = program.blocks[block_index]->begin();
458 } while (inst->GetOpcode() != IR::Opcode::Phi);
459 }
460 }
461 return ctx.Def(inst->Arg(phi_arg));
462 });
463}
464} // Anonymous namespace
465
466std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
467 IR::Program& program, Bindings& bindings) {
468 EmitContext ctx{profile, runtime_info, program, bindings};
469 const Id main{DefineMain(ctx, program)};
470 DefineEntryPoint(program, ctx, main);
471 if (profile.support_float_controls) {
472 ctx.AddExtension("SPV_KHR_float_controls");
473 SetupDenormControl(profile, program, ctx, main);
474 SetupSignedNanCapabilities(profile, program, ctx, main);
475 }
476 SetupCapabilities(profile, program.info, ctx);
477 PatchPhiNodes(program, ctx);
478 return ctx.Assemble();
479}
480
481Id EmitPhi(EmitContext& ctx, IR::Inst* inst) {
482 const size_t num_args{inst->NumArgs()};
483 boost::container::small_vector<Id, 32> blocks;
484 blocks.reserve(num_args);
485 for (size_t index = 0; index < num_args; ++index) {
486 blocks.push_back(inst->PhiBlock(index)->Definition<Id>());
487 }
488 // The type of a phi instruction is stored in its flags
489 const Id result_type{TypeId(ctx, inst->Flags<IR::Type>())};
490 return ctx.DeferredOpPhi(result_type, std::span(blocks.data(), blocks.size()));
491}
492
493void EmitVoid(EmitContext&) {}
494
495Id EmitIdentity(EmitContext& ctx, const IR::Value& value) {
496 const Id id{ctx.Def(value)};
497 if (!Sirit::ValidId(id)) {
498 throw NotImplementedException("Forward identity declaration");
499 }
500 return id;
501}
502
503Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) {
504 const Id id{ctx.Def(value)};
505 if (!Sirit::ValidId(id)) {
506 throw NotImplementedException("Forward identity declaration");
507 }
508 return id;
509}
510
511void EmitReference(EmitContext&) {}
512
513void EmitPhiMove(EmitContext&) {
514 throw LogicError("Unreachable instruction");
515}
516
517void EmitGetZeroFromOp(EmitContext&) {
518 throw LogicError("Unreachable instruction");
519}
520
521void EmitGetSignFromOp(EmitContext&) {
522 throw LogicError("Unreachable instruction");
523}
524
525void EmitGetCarryFromOp(EmitContext&) {
526 throw LogicError("Unreachable instruction");
527}
528
529void EmitGetOverflowFromOp(EmitContext&) {
530 throw LogicError("Unreachable instruction");
531}
532
533void EmitGetSparseFromOp(EmitContext&) {
534 throw LogicError("Unreachable instruction");
535}
536
537void EmitGetInBoundsFromOp(EmitContext&) {
538 throw LogicError("Unreachable instruction");
539}
540
541} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
new file mode 100644
index 000000000..db0c935fe
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -0,0 +1,27 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8
9#include <sirit/sirit.h>
10
11#include "common/common_types.h"
12#include "shader_recompiler/backend/bindings.h"
13#include "shader_recompiler/backend/spirv/emit_context.h"
14#include "shader_recompiler/frontend/ir/program.h"
15#include "shader_recompiler/profile.h"
16
17namespace Shader::Backend::SPIRV {
18
19[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
20 IR::Program& program, Bindings& bindings);
21
22[[nodiscard]] inline std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program) {
23 Bindings binding;
24 return EmitSPIRV(profile, {}, program, binding);
25}
26
27} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
new file mode 100644
index 000000000..9af8bb9e1
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@@ -0,0 +1,448 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9namespace {
10Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) {
11 const Id shift_id{ctx.Const(2U)};
12 Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
13 if (index_offset > 0) {
14 index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset));
15 }
16 return ctx.profile.support_explicit_workgroup_layout
17 ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)
18 : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
19}
20
21Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) {
22 if (offset.IsImmediate()) {
23 const u32 imm_offset{static_cast<u32>(offset.U32() / element_size)};
24 return ctx.Const(imm_offset);
25 }
26 const u32 shift{static_cast<u32>(std::countr_zero(element_size))};
27 const Id index{ctx.Def(offset)};
28 if (shift == 0) {
29 return index;
30 }
31 const Id shift_id{ctx.Const(shift)};
32 return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id);
33}
34
35Id StoragePointer(EmitContext& ctx, const StorageTypeDefinition& type_def,
36 Id StorageDefinitions::*member_ptr, const IR::Value& binding,
37 const IR::Value& offset, size_t element_size) {
38 if (!binding.IsImmediate()) {
39 throw NotImplementedException("Dynamic storage buffer indexing");
40 }
41 const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr};
42 const Id index{StorageIndex(ctx, offset, element_size)};
43 return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index);
44}
45
46std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
47 const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))};
48 const Id semantics{ctx.u32_zero_value};
49 return {scope, semantics};
50}
51
52Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
53 Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
54 const Id pointer{SharedPointer(ctx, offset)};
55 const auto [scope, semantics]{AtomicArgs(ctx)};
56 return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
57}
58
59Id StorageAtomicU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
60 Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
61 const Id pointer{StoragePointer(ctx, ctx.storage_types.U32, &StorageDefinitions::U32, binding,
62 offset, sizeof(u32))};
63 const auto [scope, semantics]{AtomicArgs(ctx)};
64 return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
65}
66
67Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
68 Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id),
69 Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) {
70 if (ctx.profile.support_int64_atomics) {
71 const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64,
72 binding, offset, sizeof(u64))};
73 const auto [scope, semantics]{AtomicArgs(ctx)};
74 return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value);
75 }
76 LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
77 const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
78 binding, offset, sizeof(u32[2]))};
79 const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))};
80 const Id result{(ctx.*non_atomic_func)(ctx.U64, value, original_value)};
81 ctx.OpStore(pointer, ctx.OpBitcast(ctx.U32[2], result));
82 return original_value;
83}
84} // Anonymous namespace
85
86Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) {
87 return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd);
88}
89
90Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) {
91 return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin);
92}
93
94Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) {
95 return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin);
96}
97
98Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) {
99 return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax);
100}
101
102Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) {
103 return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
104}
105
106Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset, Id value) {
107 const Id shift_id{ctx.Const(2U)};
108 const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
109 return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value);
110}
111
112Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset, Id value) {
113 const Id shift_id{ctx.Const(2U)};
114 const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
115 return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value);
116}
117
118Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) {
119 return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd);
120}
121
122Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) {
123 return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr);
124}
125
126Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) {
127 return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor);
128}
129
130Id EmitSharedAtomicExchange32(EmitContext& ctx, Id offset, Id value) {
131 return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicExchange);
132}
133
134Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) {
135 if (ctx.profile.support_int64_atomics && ctx.profile.support_explicit_workgroup_layout) {
136 const Id shift_id{ctx.Const(3U)};
137 const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
138 const Id pointer{
139 ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
140 const auto [scope, semantics]{AtomicArgs(ctx)};
141 return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value);
142 }
143 LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
144 const Id pointer_1{SharedPointer(ctx, offset, 0)};
145 const Id pointer_2{SharedPointer(ctx, offset, 1)};
146 const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)};
147 const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)};
148 const Id new_vector{ctx.OpBitcast(ctx.U32[2], value)};
149 ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 0U));
150 ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 1U));
151 return ctx.OpBitcast(ctx.U64, ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2));
152}
153
154Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
155 Id value) {
156 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd);
157}
158
159Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
160 Id value) {
161 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin);
162}
163
164Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
165 Id value) {
166 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin);
167}
168
169Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
170 Id value) {
171 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax);
172}
173
174Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
175 Id value) {
176 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax);
177}
178
179Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
180 Id value) {
181 const Id ssbo{ctx.ssbos[binding.U32()].U32};
182 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
183 return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo);
184}
185
186Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
187 Id value) {
188 const Id ssbo{ctx.ssbos[binding.U32()].U32};
189 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
190 return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo);
191}
192
193Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
194 Id value) {
195 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd);
196}
197
198Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
199 Id value) {
200 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr);
201}
202
203Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
204 Id value) {
205 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor);
206}
207
208Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
209 Id value) {
210 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicExchange);
211}
212
213Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
214 Id value) {
215 return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd,
216 &Sirit::Module::OpIAdd);
217}
218
219Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
220 Id value) {
221 return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin,
222 &Sirit::Module::OpSMin);
223}
224
225Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
226 Id value) {
227 return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin,
228 &Sirit::Module::OpUMin);
229}
230
231Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
232 Id value) {
233 return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax,
234 &Sirit::Module::OpSMax);
235}
236
237Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
238 Id value) {
239 return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax,
240 &Sirit::Module::OpUMax);
241}
242
243Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
244 Id value) {
245 return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd,
246 &Sirit::Module::OpBitwiseAnd);
247}
248
249Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
250 Id value) {
251 return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr,
252 &Sirit::Module::OpBitwiseOr);
253}
254
255Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
256 Id value) {
257 return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor,
258 &Sirit::Module::OpBitwiseXor);
259}
260
261Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
262 Id value) {
263 if (ctx.profile.support_int64_atomics) {
264 const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64,
265 binding, offset, sizeof(u64))};
266 const auto [scope, semantics]{AtomicArgs(ctx)};
267 return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value);
268 }
269 LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
270 const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
271 binding, offset, sizeof(u32[2]))};
272 const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))};
273 ctx.OpStore(pointer, value);
274 return original;
275}
276
277Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
278 Id value) {
279 const Id ssbo{ctx.ssbos[binding.U32()].U32};
280 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
281 return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo);
282}
283
284Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
285 Id value) {
286 const Id ssbo{ctx.ssbos[binding.U32()].U32};
287 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
288 const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)};
289 return ctx.OpBitcast(ctx.U32[1], result);
290}
291
292Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
293 Id value) {
294 const Id ssbo{ctx.ssbos[binding.U32()].U32};
295 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
296 const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)};
297 return ctx.OpPackHalf2x16(ctx.U32[1], result);
298}
299
300Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
301 Id value) {
302 const Id ssbo{ctx.ssbos[binding.U32()].U32};
303 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
304 const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)};
305 return ctx.OpBitcast(ctx.U32[1], result);
306}
307
308Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
309 Id value) {
310 const Id ssbo{ctx.ssbos[binding.U32()].U32};
311 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
312 const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)};
313 return ctx.OpPackHalf2x16(ctx.U32[1], result);
314}
315
316Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
317 Id value) {
318 const Id ssbo{ctx.ssbos[binding.U32()].U32};
319 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
320 const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)};
321 return ctx.OpBitcast(ctx.U32[1], result);
322}
323
324Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
325 Id value) {
326 const Id ssbo{ctx.ssbos[binding.U32()].U32};
327 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
328 const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)};
329 return ctx.OpPackHalf2x16(ctx.U32[1], result);
330}
331
332Id EmitGlobalAtomicIAdd32(EmitContext&) {
333 throw NotImplementedException("SPIR-V Instruction");
334}
335
336Id EmitGlobalAtomicSMin32(EmitContext&) {
337 throw NotImplementedException("SPIR-V Instruction");
338}
339
340Id EmitGlobalAtomicUMin32(EmitContext&) {
341 throw NotImplementedException("SPIR-V Instruction");
342}
343
344Id EmitGlobalAtomicSMax32(EmitContext&) {
345 throw NotImplementedException("SPIR-V Instruction");
346}
347
348Id EmitGlobalAtomicUMax32(EmitContext&) {
349 throw NotImplementedException("SPIR-V Instruction");
350}
351
352Id EmitGlobalAtomicInc32(EmitContext&) {
353 throw NotImplementedException("SPIR-V Instruction");
354}
355
356Id EmitGlobalAtomicDec32(EmitContext&) {
357 throw NotImplementedException("SPIR-V Instruction");
358}
359
360Id EmitGlobalAtomicAnd32(EmitContext&) {
361 throw NotImplementedException("SPIR-V Instruction");
362}
363
364Id EmitGlobalAtomicOr32(EmitContext&) {
365 throw NotImplementedException("SPIR-V Instruction");
366}
367
368Id EmitGlobalAtomicXor32(EmitContext&) {
369 throw NotImplementedException("SPIR-V Instruction");
370}
371
372Id EmitGlobalAtomicExchange32(EmitContext&) {
373 throw NotImplementedException("SPIR-V Instruction");
374}
375
376Id EmitGlobalAtomicIAdd64(EmitContext&) {
377 throw NotImplementedException("SPIR-V Instruction");
378}
379
380Id EmitGlobalAtomicSMin64(EmitContext&) {
381 throw NotImplementedException("SPIR-V Instruction");
382}
383
384Id EmitGlobalAtomicUMin64(EmitContext&) {
385 throw NotImplementedException("SPIR-V Instruction");
386}
387
388Id EmitGlobalAtomicSMax64(EmitContext&) {
389 throw NotImplementedException("SPIR-V Instruction");
390}
391
392Id EmitGlobalAtomicUMax64(EmitContext&) {
393 throw NotImplementedException("SPIR-V Instruction");
394}
395
396Id EmitGlobalAtomicInc64(EmitContext&) {
397 throw NotImplementedException("SPIR-V Instruction");
398}
399
400Id EmitGlobalAtomicDec64(EmitContext&) {
401 throw NotImplementedException("SPIR-V Instruction");
402}
403
404Id EmitGlobalAtomicAnd64(EmitContext&) {
405 throw NotImplementedException("SPIR-V Instruction");
406}
407
408Id EmitGlobalAtomicOr64(EmitContext&) {
409 throw NotImplementedException("SPIR-V Instruction");
410}
411
412Id EmitGlobalAtomicXor64(EmitContext&) {
413 throw NotImplementedException("SPIR-V Instruction");
414}
415
416Id EmitGlobalAtomicExchange64(EmitContext&) {
417 throw NotImplementedException("SPIR-V Instruction");
418}
419
420Id EmitGlobalAtomicAddF32(EmitContext&) {
421 throw NotImplementedException("SPIR-V Instruction");
422}
423
424Id EmitGlobalAtomicAddF16x2(EmitContext&) {
425 throw NotImplementedException("SPIR-V Instruction");
426}
427
428Id EmitGlobalAtomicAddF32x2(EmitContext&) {
429 throw NotImplementedException("SPIR-V Instruction");
430}
431
432Id EmitGlobalAtomicMinF16x2(EmitContext&) {
433 throw NotImplementedException("SPIR-V Instruction");
434}
435
436Id EmitGlobalAtomicMinF32x2(EmitContext&) {
437 throw NotImplementedException("SPIR-V Instruction");
438}
439
440Id EmitGlobalAtomicMaxF16x2(EmitContext&) {
441 throw NotImplementedException("SPIR-V Instruction");
442}
443
444Id EmitGlobalAtomicMaxF32x2(EmitContext&) {
445 throw NotImplementedException("SPIR-V Instruction");
446}
447
448} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
new file mode 100644
index 000000000..e0b52a001
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
@@ -0,0 +1,38 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7#include "shader_recompiler/frontend/ir/modifiers.h"
8
9namespace Shader::Backend::SPIRV {
10namespace {
11void MemoryBarrier(EmitContext& ctx, spv::Scope scope) {
12 const auto semantics{
13 spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
14 spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory |
15 spv::MemorySemanticsMask::ImageMemory};
16 ctx.OpMemoryBarrier(ctx.Const(static_cast<u32>(scope)), ctx.Const(static_cast<u32>(semantics)));
17}
18} // Anonymous namespace
19
20void EmitBarrier(EmitContext& ctx) {
21 const auto execution{spv::Scope::Workgroup};
22 const auto memory{spv::Scope::Workgroup};
23 const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease |
24 spv::MemorySemanticsMask::WorkgroupMemory};
25 ctx.OpControlBarrier(ctx.Const(static_cast<u32>(execution)),
26 ctx.Const(static_cast<u32>(memory)),
27 ctx.Const(static_cast<u32>(memory_semantics)));
28}
29
30void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
31 MemoryBarrier(ctx, spv::Scope::Workgroup);
32}
33
34void EmitDeviceMemoryBarrier(EmitContext& ctx) {
35 MemoryBarrier(ctx, spv::Scope::Device);
36}
37
38} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
new file mode 100644
index 000000000..bb11f4f4e
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9
10void EmitBitCastU16F16(EmitContext&) {
11 throw NotImplementedException("SPIR-V Instruction");
12}
13
14Id EmitBitCastU32F32(EmitContext& ctx, Id value) {
15 return ctx.OpBitcast(ctx.U32[1], value);
16}
17
18void EmitBitCastU64F64(EmitContext&) {
19 throw NotImplementedException("SPIR-V Instruction");
20}
21
22void EmitBitCastF16U16(EmitContext&) {
23 throw NotImplementedException("SPIR-V Instruction");
24}
25
26Id EmitBitCastF32U32(EmitContext& ctx, Id value) {
27 return ctx.OpBitcast(ctx.F32[1], value);
28}
29
30void EmitBitCastF64U64(EmitContext&) {
31 throw NotImplementedException("SPIR-V Instruction");
32}
33
34Id EmitPackUint2x32(EmitContext& ctx, Id value) {
35 return ctx.OpBitcast(ctx.U64, value);
36}
37
38Id EmitUnpackUint2x32(EmitContext& ctx, Id value) {
39 return ctx.OpBitcast(ctx.U32[2], value);
40}
41
42Id EmitPackFloat2x16(EmitContext& ctx, Id value) {
43 return ctx.OpBitcast(ctx.U32[1], value);
44}
45
46Id EmitUnpackFloat2x16(EmitContext& ctx, Id value) {
47 return ctx.OpBitcast(ctx.F16[2], value);
48}
49
50Id EmitPackHalf2x16(EmitContext& ctx, Id value) {
51 return ctx.OpPackHalf2x16(ctx.U32[1], value);
52}
53
54Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) {
55 return ctx.OpUnpackHalf2x16(ctx.F32[2], value);
56}
57
58Id EmitPackDouble2x32(EmitContext& ctx, Id value) {
59 return ctx.OpBitcast(ctx.F64[1], value);
60}
61
62Id EmitUnpackDouble2x32(EmitContext& ctx, Id value) {
63 return ctx.OpBitcast(ctx.U32[2], value);
64}
65
66} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
new file mode 100644
index 000000000..10ff4ecab
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
@@ -0,0 +1,155 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7#include "shader_recompiler/frontend/ir/modifiers.h"
8
9namespace Shader::Backend::SPIRV {
10
11Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) {
12 return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2);
13}
14
15Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
16 return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3);
17}
18
19Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
20 return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4);
21}
22
23Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) {
24 return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
25}
26
27Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index) {
28 return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
29}
30
31Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) {
32 return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
33}
34
35Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index) {
36 return ctx.OpCompositeInsert(ctx.U32[2], object, composite, index);
37}
38
39Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index) {
40 return ctx.OpCompositeInsert(ctx.U32[3], object, composite, index);
41}
42
43Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index) {
44 return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index);
45}
46
47Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) {
48 return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2);
49}
50
51Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
52 return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3);
53}
54
55Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
56 return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4);
57}
58
59Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) {
60 return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
61}
62
63Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index) {
64 return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
65}
66
67Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) {
68 return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
69}
70
71Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) {
72 return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index);
73}
74
75Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) {
76 return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index);
77}
78
79Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) {
80 return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index);
81}
82
83Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) {
84 return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2);
85}
86
87Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
88 return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3);
89}
90
91Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
92 return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4);
93}
94
95Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) {
96 return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
97}
98
99Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index) {
100 return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
101}
102
103Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) {
104 return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
105}
106
107Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) {
108 return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index);
109}
110
111Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) {
112 return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index);
113}
114
115Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) {
116 return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index);
117}
118
119void EmitCompositeConstructF64x2(EmitContext&) {
120 throw NotImplementedException("SPIR-V Instruction");
121}
122
123void EmitCompositeConstructF64x3(EmitContext&) {
124 throw NotImplementedException("SPIR-V Instruction");
125}
126
127void EmitCompositeConstructF64x4(EmitContext&) {
128 throw NotImplementedException("SPIR-V Instruction");
129}
130
131void EmitCompositeExtractF64x2(EmitContext&) {
132 throw NotImplementedException("SPIR-V Instruction");
133}
134
135void EmitCompositeExtractF64x3(EmitContext&) {
136 throw NotImplementedException("SPIR-V Instruction");
137}
138
139void EmitCompositeExtractF64x4(EmitContext&) {
140 throw NotImplementedException("SPIR-V Instruction");
141}
142
143Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) {
144 return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index);
145}
146
147Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) {
148 return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index);
149}
150
151Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) {
152 return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index);
153}
154
155} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
new file mode 100644
index 000000000..fb8c02a77
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -0,0 +1,505 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <tuple>
6#include <utility>
7
8#include "shader_recompiler/backend/spirv/emit_spirv.h"
9#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
10
11namespace Shader::Backend::SPIRV {
12namespace {
13struct AttrInfo {
14 Id pointer;
15 Id id;
16 bool needs_cast;
17};
18
19std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) {
20 const AttributeType type{ctx.runtime_info.generic_input_types.at(index)};
21 switch (type) {
22 case AttributeType::Float:
23 return AttrInfo{ctx.input_f32, ctx.F32[1], false};
24 case AttributeType::UnsignedInt:
25 return AttrInfo{ctx.input_u32, ctx.U32[1], true};
26 case AttributeType::SignedInt:
27 return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true};
28 case AttributeType::Disabled:
29 return std::nullopt;
30 }
31 throw InvalidArgument("Invalid attribute type {}", type);
32}
33
34template <typename... Args>
35Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) {
36 switch (ctx.stage) {
37 case Stage::TessellationControl:
38 case Stage::TessellationEval:
39 case Stage::Geometry:
40 return ctx.OpAccessChain(pointer_type, base, vertex, std::forward<Args>(args)...);
41 default:
42 return ctx.OpAccessChain(pointer_type, base, std::forward<Args>(args)...);
43 }
44}
45
46template <typename... Args>
47Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) {
48 if (ctx.stage == Stage::TessellationControl) {
49 const Id invocation_id{ctx.OpLoad(ctx.U32[1], ctx.invocation_id)};
50 return ctx.OpAccessChain(result_type, base, invocation_id, std::forward<Args>(args)...);
51 } else {
52 return ctx.OpAccessChain(result_type, base, std::forward<Args>(args)...);
53 }
54}
55
56struct OutAttr {
57 OutAttr(Id pointer_) : pointer{pointer_} {}
58 OutAttr(Id pointer_, Id type_) : pointer{pointer_}, type{type_} {}
59
60 Id pointer{};
61 Id type{};
62};
63
64std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
65 if (IR::IsGeneric(attr)) {
66 const u32 index{IR::GenericAttributeIndex(attr)};
67 const u32 element{IR::GenericAttributeElement(attr)};
68 const GenericElementInfo& info{ctx.output_generics.at(index).at(element)};
69 if (info.num_components == 1) {
70 return info.id;
71 } else {
72 const u32 index_element{element - info.first_element};
73 const Id index_id{ctx.Const(index_element)};
74 return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id);
75 }
76 }
77 switch (attr) {
78 case IR::Attribute::PointSize:
79 return ctx.output_point_size;
80 case IR::Attribute::PositionX:
81 case IR::Attribute::PositionY:
82 case IR::Attribute::PositionZ:
83 case IR::Attribute::PositionW: {
84 const u32 element{static_cast<u32>(attr) % 4};
85 const Id element_id{ctx.Const(element)};
86 return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id);
87 }
88 case IR::Attribute::ClipDistance0:
89 case IR::Attribute::ClipDistance1:
90 case IR::Attribute::ClipDistance2:
91 case IR::Attribute::ClipDistance3:
92 case IR::Attribute::ClipDistance4:
93 case IR::Attribute::ClipDistance5:
94 case IR::Attribute::ClipDistance6:
95 case IR::Attribute::ClipDistance7: {
96 const u32 base{static_cast<u32>(IR::Attribute::ClipDistance0)};
97 const u32 index{static_cast<u32>(attr) - base};
98 const Id clip_num{ctx.Const(index)};
99 return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num);
100 }
101 case IR::Attribute::Layer:
102 if (ctx.profile.support_viewport_index_layer_non_geometry ||
103 ctx.stage == Shader::Stage::Geometry) {
104 return OutAttr{ctx.layer, ctx.U32[1]};
105 }
106 return std::nullopt;
107 case IR::Attribute::ViewportIndex:
108 if (ctx.profile.support_viewport_index_layer_non_geometry ||
109 ctx.stage == Shader::Stage::Geometry) {
110 return OutAttr{ctx.viewport_index, ctx.U32[1]};
111 }
112 return std::nullopt;
113 case IR::Attribute::ViewportMask:
114 if (!ctx.profile.support_viewport_mask) {
115 return std::nullopt;
116 }
117 return OutAttr{ctx.OpAccessChain(ctx.output_u32, ctx.viewport_mask, ctx.u32_zero_value),
118 ctx.U32[1]};
119 default:
120 throw NotImplementedException("Read attribute {}", attr);
121 }
122}
123
124Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size,
125 const IR::Value& binding, const IR::Value& offset) {
126 if (!binding.IsImmediate()) {
127 throw NotImplementedException("Constant buffer indexing");
128 }
129 const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr};
130 const Id uniform_type{ctx.uniform_types.*member_ptr};
131 if (!offset.IsImmediate()) {
132 Id index{ctx.Def(offset)};
133 if (element_size > 1) {
134 const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))};
135 const Id shift{ctx.Const(log2_element_size)};
136 index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift);
137 }
138 const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)};
139 return ctx.OpLoad(result_type, access_chain);
140 }
141 // Hardware been proved to read the aligned offset (e.g. LDC.U32 at 6 will read offset 4)
142 const Id imm_offset{ctx.Const(offset.U32() / element_size)};
143 const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)};
144 return ctx.OpLoad(result_type, access_chain);
145}
146
147Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
148 return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset);
149}
150
151Id GetCbufU32x4(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
152 return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset);
153}
154
155Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 index_offset) {
156 if (offset.IsImmediate()) {
157 const u32 element{(offset.U32() / 4) % 4 + index_offset};
158 return ctx.OpCompositeExtract(ctx.U32[1], vector, element);
159 }
160 const Id shift{ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))};
161 Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))};
162 if (index_offset > 0) {
163 element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset));
164 }
165 return ctx.OpVectorExtractDynamic(ctx.U32[1], vector, element);
166}
167} // Anonymous namespace
168
169void EmitGetRegister(EmitContext&) {
170 throw LogicError("Unreachable instruction");
171}
172
173void EmitSetRegister(EmitContext&) {
174 throw LogicError("Unreachable instruction");
175}
176
177void EmitGetPred(EmitContext&) {
178 throw LogicError("Unreachable instruction");
179}
180
181void EmitSetPred(EmitContext&) {
182 throw LogicError("Unreachable instruction");
183}
184
185void EmitSetGotoVariable(EmitContext&) {
186 throw LogicError("Unreachable instruction");
187}
188
189void EmitGetGotoVariable(EmitContext&) {
190 throw LogicError("Unreachable instruction");
191}
192
193void EmitSetIndirectBranchVariable(EmitContext&) {
194 throw LogicError("Unreachable instruction");
195}
196
197void EmitGetIndirectBranchVariable(EmitContext&) {
198 throw LogicError("Unreachable instruction");
199}
200
201Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
202 if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) {
203 const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)};
204 return ctx.OpUConvert(ctx.U32[1], load);
205 }
206 Id element{};
207 if (ctx.profile.support_descriptor_aliasing) {
208 element = GetCbufU32(ctx, binding, offset);
209 } else {
210 const Id vector{GetCbufU32x4(ctx, binding, offset)};
211 element = GetCbufElement(ctx, vector, offset, 0u);
212 }
213 const Id bit_offset{ctx.BitOffset8(offset)};
214 return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u));
215}
216
217Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
218 if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) {
219 const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)};
220 return ctx.OpSConvert(ctx.U32[1], load);
221 }
222 Id element{};
223 if (ctx.profile.support_descriptor_aliasing) {
224 element = GetCbufU32(ctx, binding, offset);
225 } else {
226 const Id vector{GetCbufU32x4(ctx, binding, offset)};
227 element = GetCbufElement(ctx, vector, offset, 0u);
228 }
229 const Id bit_offset{ctx.BitOffset8(offset)};
230 return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u));
231}
232
233Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
234 if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) {
235 const Id load{
236 GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)};
237 return ctx.OpUConvert(ctx.U32[1], load);
238 }
239 Id element{};
240 if (ctx.profile.support_descriptor_aliasing) {
241 element = GetCbufU32(ctx, binding, offset);
242 } else {
243 const Id vector{GetCbufU32x4(ctx, binding, offset)};
244 element = GetCbufElement(ctx, vector, offset, 0u);
245 }
246 const Id bit_offset{ctx.BitOffset16(offset)};
247 return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u));
248}
249
250Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
251 if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) {
252 const Id load{
253 GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)};
254 return ctx.OpSConvert(ctx.U32[1], load);
255 }
256 Id element{};
257 if (ctx.profile.support_descriptor_aliasing) {
258 element = GetCbufU32(ctx, binding, offset);
259 } else {
260 const Id vector{GetCbufU32x4(ctx, binding, offset)};
261 element = GetCbufElement(ctx, vector, offset, 0u);
262 }
263 const Id bit_offset{ctx.BitOffset16(offset)};
264 return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u));
265}
266
267Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
268 if (ctx.profile.support_descriptor_aliasing) {
269 return GetCbufU32(ctx, binding, offset);
270 } else {
271 const Id vector{GetCbufU32x4(ctx, binding, offset)};
272 return GetCbufElement(ctx, vector, offset, 0u);
273 }
274}
275
276Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
277 if (ctx.profile.support_descriptor_aliasing) {
278 return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset);
279 } else {
280 const Id vector{GetCbufU32x4(ctx, binding, offset)};
281 return ctx.OpBitcast(ctx.F32[1], GetCbufElement(ctx, vector, offset, 0u));
282 }
283}
284
285Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
286 if (ctx.profile.support_descriptor_aliasing) {
287 return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding,
288 offset);
289 } else {
290 const Id vector{GetCbufU32x4(ctx, binding, offset)};
291 return ctx.OpCompositeConstruct(ctx.U32[2], GetCbufElement(ctx, vector, offset, 0u),
292 GetCbufElement(ctx, vector, offset, 1u));
293 }
294}
295
296Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
297 const u32 element{static_cast<u32>(attr) % 4};
298 if (IR::IsGeneric(attr)) {
299 const u32 index{IR::GenericAttributeIndex(attr)};
300 const std::optional<AttrInfo> type{AttrTypes(ctx, index)};
301 if (!type) {
302 // Attribute is disabled
303 return ctx.Const(element == 3 ? 1.0f : 0.0f);
304 }
305 if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) {
306 // Varying component is not written
307 return ctx.Const(type && element == 3 ? 1.0f : 0.0f);
308 }
309 const Id generic_id{ctx.input_generics.at(index)};
310 const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))};
311 const Id value{ctx.OpLoad(type->id, pointer)};
312 return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;
313 }
314 switch (attr) {
315 case IR::Attribute::PrimitiveId:
316 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id));
317 case IR::Attribute::PositionX:
318 case IR::Attribute::PositionY:
319 case IR::Attribute::PositionZ:
320 case IR::Attribute::PositionW:
321 return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
322 ctx.Const(element)));
323 case IR::Attribute::InstanceId:
324 if (ctx.profile.support_vertex_instance_id) {
325 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
326 } else {
327 const Id index{ctx.OpLoad(ctx.U32[1], ctx.instance_index)};
328 const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_instance)};
329 return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
330 }
331 case IR::Attribute::VertexId:
332 if (ctx.profile.support_vertex_instance_id) {
333 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_id));
334 } else {
335 const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)};
336 const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
337 return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
338 }
339 case IR::Attribute::FrontFace:
340 return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
341 ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value);
342 case IR::Attribute::PointSpriteS:
343 return ctx.OpLoad(ctx.F32[1],
344 ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value));
345 case IR::Attribute::PointSpriteT:
346 return ctx.OpLoad(ctx.F32[1],
347 ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.Const(1U)));
348 case IR::Attribute::TessellationEvaluationPointU:
349 return ctx.OpLoad(ctx.F32[1],
350 ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
351 case IR::Attribute::TessellationEvaluationPointV:
352 return ctx.OpLoad(ctx.F32[1],
353 ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.Const(1U)));
354
355 default:
356 throw NotImplementedException("Read attribute {}", attr);
357 }
358}
359
360void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) {
361 const std::optional<OutAttr> output{OutputAttrPointer(ctx, attr)};
362 if (!output) {
363 return;
364 }
365 if (Sirit::ValidId(output->type)) {
366 value = ctx.OpBitcast(output->type, value);
367 }
368 ctx.OpStore(output->pointer, value);
369}
370
371Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) {
372 switch (ctx.stage) {
373 case Stage::TessellationControl:
374 case Stage::TessellationEval:
375 case Stage::Geometry:
376 return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset, vertex);
377 default:
378 return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset);
379 }
380}
381
382void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, [[maybe_unused]] Id vertex) {
383 ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value);
384}
385
386Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) {
387 if (!IR::IsGeneric(patch)) {
388 throw NotImplementedException("Non-generic patch load");
389 }
390 const u32 index{IR::GenericPatchIndex(patch)};
391 const Id element{ctx.Const(IR::GenericPatchElement(patch))};
392 const Id type{ctx.stage == Stage::TessellationControl ? ctx.output_f32 : ctx.input_f32};
393 const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)};
394 return ctx.OpLoad(ctx.F32[1], pointer);
395}
396
397void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
398 const Id pointer{[&] {
399 if (IR::IsGeneric(patch)) {
400 const u32 index{IR::GenericPatchIndex(patch)};
401 const Id element{ctx.Const(IR::GenericPatchElement(patch))};
402 return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element);
403 }
404 switch (patch) {
405 case IR::Patch::TessellationLodLeft:
406 case IR::Patch::TessellationLodRight:
407 case IR::Patch::TessellationLodTop:
408 case IR::Patch::TessellationLodBottom: {
409 const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
410 const Id index_id{ctx.Const(index)};
411 return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id);
412 }
413 case IR::Patch::TessellationLodInteriorU:
414 return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner,
415 ctx.u32_zero_value);
416 case IR::Patch::TessellationLodInteriorV:
417 return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.Const(1u));
418 default:
419 throw NotImplementedException("Patch {}", patch);
420 }
421 }()};
422 ctx.OpStore(pointer, value);
423}
424
425void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
426 const Id component_id{ctx.Const(component)};
427 const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)};
428 ctx.OpStore(pointer, value);
429}
430
431void EmitSetSampleMask(EmitContext& ctx, Id value) {
432 ctx.OpStore(ctx.sample_mask, value);
433}
434
435void EmitSetFragDepth(EmitContext& ctx, Id value) {
436 ctx.OpStore(ctx.frag_depth, value);
437}
438
439void EmitGetZFlag(EmitContext&) {
440 throw NotImplementedException("SPIR-V Instruction");
441}
442
443void EmitGetSFlag(EmitContext&) {
444 throw NotImplementedException("SPIR-V Instruction");
445}
446
447void EmitGetCFlag(EmitContext&) {
448 throw NotImplementedException("SPIR-V Instruction");
449}
450
451void EmitGetOFlag(EmitContext&) {
452 throw NotImplementedException("SPIR-V Instruction");
453}
454
455void EmitSetZFlag(EmitContext&) {
456 throw NotImplementedException("SPIR-V Instruction");
457}
458
459void EmitSetSFlag(EmitContext&) {
460 throw NotImplementedException("SPIR-V Instruction");
461}
462
463void EmitSetCFlag(EmitContext&) {
464 throw NotImplementedException("SPIR-V Instruction");
465}
466
467void EmitSetOFlag(EmitContext&) {
468 throw NotImplementedException("SPIR-V Instruction");
469}
470
471Id EmitWorkgroupId(EmitContext& ctx) {
472 return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id);
473}
474
475Id EmitLocalInvocationId(EmitContext& ctx) {
476 return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id);
477}
478
479Id EmitInvocationId(EmitContext& ctx) {
480 return ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
481}
482
483Id EmitSampleId(EmitContext& ctx) {
484 return ctx.OpLoad(ctx.U32[1], ctx.sample_id);
485}
486
487Id EmitIsHelperInvocation(EmitContext& ctx) {
488 return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation);
489}
490
491Id EmitYDirection(EmitContext& ctx) {
492 return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f);
493}
494
495Id EmitLoadLocal(EmitContext& ctx, Id word_offset) {
496 const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
497 return ctx.OpLoad(ctx.U32[1], pointer);
498}
499
500void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value) {
501 const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
502 ctx.OpStore(pointer, value);
503}
504
505} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
new file mode 100644
index 000000000..d33486f28
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9
10void EmitJoin(EmitContext&) {
11 throw NotImplementedException("Join shouldn't be emitted");
12}
13
14void EmitDemoteToHelperInvocation(EmitContext& ctx) {
15 if (ctx.profile.support_demote_to_helper_invocation) {
16 ctx.OpDemoteToHelperInvocationEXT();
17 } else {
18 const Id kill_label{ctx.OpLabel()};
19 const Id impossible_label{ctx.OpLabel()};
20 ctx.OpSelectionMerge(impossible_label, spv::SelectionControlMask::MaskNone);
21 ctx.OpBranchConditional(ctx.true_value, kill_label, impossible_label);
22 ctx.AddLabel(kill_label);
23 ctx.OpKill();
24 ctx.AddLabel(impossible_label);
25 }
26}
27
28} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
new file mode 100644
index 000000000..fd42b7a16
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
@@ -0,0 +1,269 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9namespace {
10Id ExtractU16(EmitContext& ctx, Id value) {
11 if (ctx.profile.support_int16) {
12 return ctx.OpUConvert(ctx.U16, value);
13 } else {
14 return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u));
15 }
16}
17
18Id ExtractS16(EmitContext& ctx, Id value) {
19 if (ctx.profile.support_int16) {
20 return ctx.OpSConvert(ctx.S16, value);
21 } else {
22 return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u));
23 }
24}
25
26Id ExtractU8(EmitContext& ctx, Id value) {
27 if (ctx.profile.support_int8) {
28 return ctx.OpUConvert(ctx.U8, value);
29 } else {
30 return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u));
31 }
32}
33
34Id ExtractS8(EmitContext& ctx, Id value) {
35 if (ctx.profile.support_int8) {
36 return ctx.OpSConvert(ctx.S8, value);
37 } else {
38 return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u));
39 }
40}
41} // Anonymous namespace
42
43Id EmitConvertS16F16(EmitContext& ctx, Id value) {
44 if (ctx.profile.support_int16) {
45 return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
46 } else {
47 return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value));
48 }
49}
50
51Id EmitConvertS16F32(EmitContext& ctx, Id value) {
52 if (ctx.profile.support_int16) {
53 return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
54 } else {
55 return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value));
56 }
57}
58
59Id EmitConvertS16F64(EmitContext& ctx, Id value) {
60 if (ctx.profile.support_int16) {
61 return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
62 } else {
63 return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value));
64 }
65}
66
67Id EmitConvertS32F16(EmitContext& ctx, Id value) {
68 return ctx.OpConvertFToS(ctx.U32[1], value);
69}
70
71Id EmitConvertS32F32(EmitContext& ctx, Id value) {
72 if (ctx.profile.has_broken_signed_operations) {
73 return ctx.OpBitcast(ctx.U32[1], ctx.OpConvertFToS(ctx.S32[1], value));
74 } else {
75 return ctx.OpConvertFToS(ctx.U32[1], value);
76 }
77}
78
79Id EmitConvertS32F64(EmitContext& ctx, Id value) {
80 return ctx.OpConvertFToS(ctx.U32[1], value);
81}
82
83Id EmitConvertS64F16(EmitContext& ctx, Id value) {
84 return ctx.OpConvertFToS(ctx.U64, value);
85}
86
87Id EmitConvertS64F32(EmitContext& ctx, Id value) {
88 return ctx.OpConvertFToS(ctx.U64, value);
89}
90
91Id EmitConvertS64F64(EmitContext& ctx, Id value) {
92 return ctx.OpConvertFToS(ctx.U64, value);
93}
94
95Id EmitConvertU16F16(EmitContext& ctx, Id value) {
96 if (ctx.profile.support_int16) {
97 return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
98 } else {
99 return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value));
100 }
101}
102
103Id EmitConvertU16F32(EmitContext& ctx, Id value) {
104 if (ctx.profile.support_int16) {
105 return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
106 } else {
107 return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value));
108 }
109}
110
111Id EmitConvertU16F64(EmitContext& ctx, Id value) {
112 if (ctx.profile.support_int16) {
113 return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
114 } else {
115 return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value));
116 }
117}
118
119Id EmitConvertU32F16(EmitContext& ctx, Id value) {
120 return ctx.OpConvertFToU(ctx.U32[1], value);
121}
122
123Id EmitConvertU32F32(EmitContext& ctx, Id value) {
124 return ctx.OpConvertFToU(ctx.U32[1], value);
125}
126
127Id EmitConvertU32F64(EmitContext& ctx, Id value) {
128 return ctx.OpConvertFToU(ctx.U32[1], value);
129}
130
131Id EmitConvertU64F16(EmitContext& ctx, Id value) {
132 return ctx.OpConvertFToU(ctx.U64, value);
133}
134
135Id EmitConvertU64F32(EmitContext& ctx, Id value) {
136 return ctx.OpConvertFToU(ctx.U64, value);
137}
138
139Id EmitConvertU64F64(EmitContext& ctx, Id value) {
140 return ctx.OpConvertFToU(ctx.U64, value);
141}
142
143Id EmitConvertU64U32(EmitContext& ctx, Id value) {
144 return ctx.OpUConvert(ctx.U64, value);
145}
146
147Id EmitConvertU32U64(EmitContext& ctx, Id value) {
148 return ctx.OpUConvert(ctx.U32[1], value);
149}
150
151Id EmitConvertF16F32(EmitContext& ctx, Id value) {
152 return ctx.OpFConvert(ctx.F16[1], value);
153}
154
155Id EmitConvertF32F16(EmitContext& ctx, Id value) {
156 return ctx.OpFConvert(ctx.F32[1], value);
157}
158
159Id EmitConvertF32F64(EmitContext& ctx, Id value) {
160 return ctx.OpFConvert(ctx.F32[1], value);
161}
162
163Id EmitConvertF64F32(EmitContext& ctx, Id value) {
164 return ctx.OpFConvert(ctx.F64[1], value);
165}
166
167Id EmitConvertF16S8(EmitContext& ctx, Id value) {
168 return ctx.OpConvertSToF(ctx.F16[1], ExtractS8(ctx, value));
169}
170
171Id EmitConvertF16S16(EmitContext& ctx, Id value) {
172 return ctx.OpConvertSToF(ctx.F16[1], ExtractS16(ctx, value));
173}
174
175Id EmitConvertF16S32(EmitContext& ctx, Id value) {
176 return ctx.OpConvertSToF(ctx.F16[1], value);
177}
178
179Id EmitConvertF16S64(EmitContext& ctx, Id value) {
180 return ctx.OpConvertSToF(ctx.F16[1], value);
181}
182
183Id EmitConvertF16U8(EmitContext& ctx, Id value) {
184 return ctx.OpConvertUToF(ctx.F16[1], ExtractU8(ctx, value));
185}
186
187Id EmitConvertF16U16(EmitContext& ctx, Id value) {
188 return ctx.OpConvertUToF(ctx.F16[1], ExtractU16(ctx, value));
189}
190
191Id EmitConvertF16U32(EmitContext& ctx, Id value) {
192 return ctx.OpConvertUToF(ctx.F16[1], value);
193}
194
195Id EmitConvertF16U64(EmitContext& ctx, Id value) {
196 return ctx.OpConvertUToF(ctx.F16[1], value);
197}
198
199Id EmitConvertF32S8(EmitContext& ctx, Id value) {
200 return ctx.OpConvertSToF(ctx.F32[1], ExtractS8(ctx, value));
201}
202
203Id EmitConvertF32S16(EmitContext& ctx, Id value) {
204 return ctx.OpConvertSToF(ctx.F32[1], ExtractS16(ctx, value));
205}
206
207Id EmitConvertF32S32(EmitContext& ctx, Id value) {
208 if (ctx.profile.has_broken_signed_operations) {
209 value = ctx.OpBitcast(ctx.S32[1], value);
210 }
211 return ctx.OpConvertSToF(ctx.F32[1], value);
212}
213
214Id EmitConvertF32S64(EmitContext& ctx, Id value) {
215 return ctx.OpConvertSToF(ctx.F32[1], value);
216}
217
218Id EmitConvertF32U8(EmitContext& ctx, Id value) {
219 return ctx.OpConvertUToF(ctx.F32[1], ExtractU8(ctx, value));
220}
221
222Id EmitConvertF32U16(EmitContext& ctx, Id value) {
223 return ctx.OpConvertUToF(ctx.F32[1], ExtractU16(ctx, value));
224}
225
226Id EmitConvertF32U32(EmitContext& ctx, Id value) {
227 return ctx.OpConvertUToF(ctx.F32[1], value);
228}
229
230Id EmitConvertF32U64(EmitContext& ctx, Id value) {
231 return ctx.OpConvertUToF(ctx.F32[1], value);
232}
233
234Id EmitConvertF64S8(EmitContext& ctx, Id value) {
235 return ctx.OpConvertSToF(ctx.F64[1], ExtractS8(ctx, value));
236}
237
238Id EmitConvertF64S16(EmitContext& ctx, Id value) {
239 return ctx.OpConvertSToF(ctx.F64[1], ExtractS16(ctx, value));
240}
241
242Id EmitConvertF64S32(EmitContext& ctx, Id value) {
243 if (ctx.profile.has_broken_signed_operations) {
244 value = ctx.OpBitcast(ctx.S32[1], value);
245 }
246 return ctx.OpConvertSToF(ctx.F64[1], value);
247}
248
249Id EmitConvertF64S64(EmitContext& ctx, Id value) {
250 return ctx.OpConvertSToF(ctx.F64[1], value);
251}
252
253Id EmitConvertF64U8(EmitContext& ctx, Id value) {
254 return ctx.OpConvertUToF(ctx.F64[1], ExtractU8(ctx, value));
255}
256
257Id EmitConvertF64U16(EmitContext& ctx, Id value) {
258 return ctx.OpConvertUToF(ctx.F64[1], ExtractU16(ctx, value));
259}
260
261Id EmitConvertF64U32(EmitContext& ctx, Id value) {
262 return ctx.OpConvertUToF(ctx.F64[1], value);
263}
264
265Id EmitConvertF64U64(EmitContext& ctx, Id value) {
266 return ctx.OpConvertUToF(ctx.F64[1], value);
267}
268
269} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
new file mode 100644
index 000000000..61cf25f9c
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
@@ -0,0 +1,396 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7#include "shader_recompiler/frontend/ir/modifiers.h"
8
9namespace Shader::Backend::SPIRV {
10namespace {
11Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) {
12 const auto flags{inst->Flags<IR::FpControl>()};
13 if (flags.no_contraction) {
14 ctx.Decorate(op, spv::Decoration::NoContraction);
15 }
16 return op;
17}
18
19Id Clamp(EmitContext& ctx, Id type, Id value, Id zero, Id one) {
20 if (ctx.profile.has_broken_spirv_clamp) {
21 return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one);
22 } else {
23 return ctx.OpFClamp(type, value, zero, one);
24 }
25}
26
27Id FPOrdNotEqual(EmitContext& ctx, Id lhs, Id rhs) {
28 if (ctx.profile.ignore_nan_fp_comparisons) {
29 const Id comp{ctx.OpFOrdEqual(ctx.U1, lhs, rhs)};
30 const Id lhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, lhs))};
31 const Id rhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, rhs))};
32 return ctx.OpLogicalAnd(ctx.U1, ctx.OpLogicalAnd(ctx.U1, comp, lhs_not_nan), rhs_not_nan);
33 } else {
34 return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs);
35 }
36}
37
38Id FPUnordCompare(Id (EmitContext::*comp_func)(Id, Id, Id), EmitContext& ctx, Id lhs, Id rhs) {
39 if (ctx.profile.ignore_nan_fp_comparisons) {
40 const Id lhs_nan{ctx.OpIsNan(ctx.U1, lhs)};
41 const Id rhs_nan{ctx.OpIsNan(ctx.U1, rhs)};
42 const Id comp{(ctx.*comp_func)(ctx.U1, lhs, rhs)};
43 return ctx.OpLogicalOr(ctx.U1, ctx.OpLogicalOr(ctx.U1, comp, lhs_nan), rhs_nan);
44 } else {
45 return (ctx.*comp_func)(ctx.U1, lhs, rhs);
46 }
47}
48} // Anonymous namespace
49
50Id EmitFPAbs16(EmitContext& ctx, Id value) {
51 return ctx.OpFAbs(ctx.F16[1], value);
52}
53
54Id EmitFPAbs32(EmitContext& ctx, Id value) {
55 return ctx.OpFAbs(ctx.F32[1], value);
56}
57
58Id EmitFPAbs64(EmitContext& ctx, Id value) {
59 return ctx.OpFAbs(ctx.F64[1], value);
60}
61
62Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
63 return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b));
64}
65
66Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
67 return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b));
68}
69
70Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
71 return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b));
72}
73
74Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
75 return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c));
76}
77
78Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
79 return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c));
80}
81
82Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
83 return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c));
84}
85
86Id EmitFPMax32(EmitContext& ctx, Id a, Id b) {
87 return ctx.OpFMax(ctx.F32[1], a, b);
88}
89
90Id EmitFPMax64(EmitContext& ctx, Id a, Id b) {
91 return ctx.OpFMax(ctx.F64[1], a, b);
92}
93
94Id EmitFPMin32(EmitContext& ctx, Id a, Id b) {
95 return ctx.OpFMin(ctx.F32[1], a, b);
96}
97
98Id EmitFPMin64(EmitContext& ctx, Id a, Id b) {
99 return ctx.OpFMin(ctx.F64[1], a, b);
100}
101
102Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
103 return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b));
104}
105
106Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
107 return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b));
108}
109
110Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
111 return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
112}
113
114Id EmitFPNeg16(EmitContext& ctx, Id value) {
115 return ctx.OpFNegate(ctx.F16[1], value);
116}
117
118Id EmitFPNeg32(EmitContext& ctx, Id value) {
119 return ctx.OpFNegate(ctx.F32[1], value);
120}
121
122Id EmitFPNeg64(EmitContext& ctx, Id value) {
123 return ctx.OpFNegate(ctx.F64[1], value);
124}
125
126Id EmitFPSin(EmitContext& ctx, Id value) {
127 return ctx.OpSin(ctx.F32[1], value);
128}
129
130Id EmitFPCos(EmitContext& ctx, Id value) {
131 return ctx.OpCos(ctx.F32[1], value);
132}
133
134Id EmitFPExp2(EmitContext& ctx, Id value) {
135 return ctx.OpExp2(ctx.F32[1], value);
136}
137
138Id EmitFPLog2(EmitContext& ctx, Id value) {
139 return ctx.OpLog2(ctx.F32[1], value);
140}
141
142Id EmitFPRecip32(EmitContext& ctx, Id value) {
143 return ctx.OpFDiv(ctx.F32[1], ctx.Const(1.0f), value);
144}
145
146Id EmitFPRecip64(EmitContext& ctx, Id value) {
147 return ctx.OpFDiv(ctx.F64[1], ctx.Constant(ctx.F64[1], 1.0f), value);
148}
149
150Id EmitFPRecipSqrt32(EmitContext& ctx, Id value) {
151 return ctx.OpInverseSqrt(ctx.F32[1], value);
152}
153
154Id EmitFPRecipSqrt64(EmitContext& ctx, Id value) {
155 return ctx.OpInverseSqrt(ctx.F64[1], value);
156}
157
158Id EmitFPSqrt(EmitContext& ctx, Id value) {
159 return ctx.OpSqrt(ctx.F32[1], value);
160}
161
162Id EmitFPSaturate16(EmitContext& ctx, Id value) {
163 const Id zero{ctx.Constant(ctx.F16[1], u16{0})};
164 const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})};
165 return Clamp(ctx, ctx.F16[1], value, zero, one);
166}
167
168Id EmitFPSaturate32(EmitContext& ctx, Id value) {
169 const Id zero{ctx.Const(f32{0.0})};
170 const Id one{ctx.Const(f32{1.0})};
171 return Clamp(ctx, ctx.F32[1], value, zero, one);
172}
173
174Id EmitFPSaturate64(EmitContext& ctx, Id value) {
175 const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})};
176 const Id one{ctx.Constant(ctx.F64[1], f64{1.0})};
177 return Clamp(ctx, ctx.F64[1], value, zero, one);
178}
179
180Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value) {
181 return Clamp(ctx, ctx.F16[1], value, min_value, max_value);
182}
183
184Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value) {
185 return Clamp(ctx, ctx.F32[1], value, min_value, max_value);
186}
187
188Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value) {
189 return Clamp(ctx, ctx.F64[1], value, min_value, max_value);
190}
191
192Id EmitFPRoundEven16(EmitContext& ctx, Id value) {
193 return ctx.OpRoundEven(ctx.F16[1], value);
194}
195
196Id EmitFPRoundEven32(EmitContext& ctx, Id value) {
197 return ctx.OpRoundEven(ctx.F32[1], value);
198}
199
200Id EmitFPRoundEven64(EmitContext& ctx, Id value) {
201 return ctx.OpRoundEven(ctx.F64[1], value);
202}
203
204Id EmitFPFloor16(EmitContext& ctx, Id value) {
205 return ctx.OpFloor(ctx.F16[1], value);
206}
207
208Id EmitFPFloor32(EmitContext& ctx, Id value) {
209 return ctx.OpFloor(ctx.F32[1], value);
210}
211
212Id EmitFPFloor64(EmitContext& ctx, Id value) {
213 return ctx.OpFloor(ctx.F64[1], value);
214}
215
216Id EmitFPCeil16(EmitContext& ctx, Id value) {
217 return ctx.OpCeil(ctx.F16[1], value);
218}
219
220Id EmitFPCeil32(EmitContext& ctx, Id value) {
221 return ctx.OpCeil(ctx.F32[1], value);
222}
223
224Id EmitFPCeil64(EmitContext& ctx, Id value) {
225 return ctx.OpCeil(ctx.F64[1], value);
226}
227
228Id EmitFPTrunc16(EmitContext& ctx, Id value) {
229 return ctx.OpTrunc(ctx.F16[1], value);
230}
231
232Id EmitFPTrunc32(EmitContext& ctx, Id value) {
233 return ctx.OpTrunc(ctx.F32[1], value);
234}
235
236Id EmitFPTrunc64(EmitContext& ctx, Id value) {
237 return ctx.OpTrunc(ctx.F64[1], value);
238}
239
240Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) {
241 return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
242}
243
244Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs) {
245 return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
246}
247
248Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs) {
249 return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
250}
251
252Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs) {
253 return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs);
254}
255
256Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs) {
257 return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs);
258}
259
260Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs) {
261 return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs);
262}
263
264Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs) {
265 return FPOrdNotEqual(ctx, lhs, rhs);
266}
267
268Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs) {
269 return FPOrdNotEqual(ctx, lhs, rhs);
270}
271
272Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs) {
273 return FPOrdNotEqual(ctx, lhs, rhs);
274}
275
276Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs) {
277 return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
278}
279
280Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs) {
281 return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
282}
283
284Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs) {
285 return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
286}
287
288Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs) {
289 return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
290}
291
292Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs) {
293 return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
294}
295
296Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs) {
297 return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
298}
299
300Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs) {
301 return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs);
302}
303
304Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs) {
305 return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs);
306}
307
308Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs) {
309 return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs);
310}
311
312Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) {
313 return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
314}
315
316Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
317 return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
318}
319
320Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
321 return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
322}
323
324Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) {
325 return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs);
326}
327
328Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
329 return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs);
330}
331
332Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
333 return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs);
334}
335
336Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
337 return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
338}
339
340Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
341 return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
342}
343
344Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
345 return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
346}
347
348Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
349 return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs);
350}
351
352Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
353 return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs);
354}
355
356Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
357 return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs);
358}
359
360Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
361 return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
362}
363
364Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
365 return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
366}
367
368Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
369 return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
370}
371
372Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
373 return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs);
374}
375
376Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
377 return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs);
378}
379
380Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
381 return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs);
382}
383
384Id EmitFPIsNan16(EmitContext& ctx, Id value) {
385 return ctx.OpIsNan(ctx.U1, value);
386}
387
388Id EmitFPIsNan32(EmitContext& ctx, Id value) {
389 return ctx.OpIsNan(ctx.U1, value);
390}
391
392Id EmitFPIsNan64(EmitContext& ctx, Id value) {
393 return ctx.OpIsNan(ctx.U1, value);
394}
395
396} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
new file mode 100644
index 000000000..3588f052b
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -0,0 +1,462 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <boost/container/static_vector.hpp>
6
7#include "shader_recompiler/backend/spirv/emit_spirv.h"
8#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10
11namespace Shader::Backend::SPIRV {
12namespace {
13class ImageOperands {
14public:
15 explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp,
16 Id lod, const IR::Value& offset) {
17 if (has_bias) {
18 const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod};
19 Add(spv::ImageOperandsMask::Bias, bias);
20 }
21 if (has_lod) {
22 const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod};
23 Add(spv::ImageOperandsMask::Lod, lod_value);
24 }
25 AddOffset(ctx, offset);
26 if (has_lod_clamp) {
27 const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod};
28 Add(spv::ImageOperandsMask::MinLod, lod_clamp);
29 }
30 }
31
32 explicit ImageOperands(EmitContext& ctx, const IR::Value& offset, const IR::Value& offset2) {
33 if (offset2.IsEmpty()) {
34 if (offset.IsEmpty()) {
35 return;
36 }
37 Add(spv::ImageOperandsMask::Offset, ctx.Def(offset));
38 return;
39 }
40 const std::array values{offset.InstRecursive(), offset2.InstRecursive()};
41 if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) {
42 LOG_WARNING(Shader_SPIRV, "Not all arguments in PTP are immediate, ignoring");
43 return;
44 }
45 const IR::Opcode opcode{values[0]->GetOpcode()};
46 if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
47 throw LogicError("Invalid PTP arguments");
48 }
49 auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }};
50
51 const Id offsets{ctx.ConstantComposite(
52 ctx.TypeArray(ctx.U32[2], ctx.Const(4U)), ctx.Const(read(0, 0), read(0, 1)),
53 ctx.Const(read(0, 2), read(0, 3)), ctx.Const(read(1, 0), read(1, 1)),
54 ctx.Const(read(1, 2), read(1, 3)))};
55 Add(spv::ImageOperandsMask::ConstOffsets, offsets);
56 }
57
58 explicit ImageOperands(Id offset, Id lod, Id ms) {
59 if (Sirit::ValidId(lod)) {
60 Add(spv::ImageOperandsMask::Lod, lod);
61 }
62 if (Sirit::ValidId(offset)) {
63 Add(spv::ImageOperandsMask::Offset, offset);
64 }
65 if (Sirit::ValidId(ms)) {
66 Add(spv::ImageOperandsMask::Sample, ms);
67 }
68 }
69
70 explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates,
71 Id offset, Id lod_clamp) {
72 if (!Sirit::ValidId(derivates)) {
73 throw LogicError("Derivates must be present");
74 }
75 boost::container::static_vector<Id, 3> deriv_x_accum;
76 boost::container::static_vector<Id, 3> deriv_y_accum;
77 for (u32 i = 0; i < num_derivates; ++i) {
78 deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2));
79 deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1));
80 }
81 const Id derivates_X{ctx.OpCompositeConstruct(
82 ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})};
83 const Id derivates_Y{ctx.OpCompositeConstruct(
84 ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})};
85 Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y);
86 if (Sirit::ValidId(offset)) {
87 Add(spv::ImageOperandsMask::Offset, offset);
88 }
89 if (has_lod_clamp) {
90 Add(spv::ImageOperandsMask::MinLod, lod_clamp);
91 }
92 }
93
94 std::span<const Id> Span() const noexcept {
95 return std::span{operands.data(), operands.size()};
96 }
97
98 std::optional<spv::ImageOperandsMask> MaskOptional() const noexcept {
99 return mask != spv::ImageOperandsMask{} ? std::make_optional(mask) : std::nullopt;
100 }
101
102 spv::ImageOperandsMask Mask() const noexcept {
103 return mask;
104 }
105
106private:
107 void AddOffset(EmitContext& ctx, const IR::Value& offset) {
108 if (offset.IsEmpty()) {
109 return;
110 }
111 if (offset.IsImmediate()) {
112 Add(spv::ImageOperandsMask::ConstOffset, ctx.SConst(static_cast<s32>(offset.U32())));
113 return;
114 }
115 IR::Inst* const inst{offset.InstRecursive()};
116 if (inst->AreAllArgsImmediates()) {
117 switch (inst->GetOpcode()) {
118 case IR::Opcode::CompositeConstructU32x2:
119 Add(spv::ImageOperandsMask::ConstOffset,
120 ctx.SConst(static_cast<s32>(inst->Arg(0).U32()),
121 static_cast<s32>(inst->Arg(1).U32())));
122 return;
123 case IR::Opcode::CompositeConstructU32x3:
124 Add(spv::ImageOperandsMask::ConstOffset,
125 ctx.SConst(static_cast<s32>(inst->Arg(0).U32()),
126 static_cast<s32>(inst->Arg(1).U32()),
127 static_cast<s32>(inst->Arg(2).U32())));
128 return;
129 case IR::Opcode::CompositeConstructU32x4:
130 Add(spv::ImageOperandsMask::ConstOffset,
131 ctx.SConst(static_cast<s32>(inst->Arg(0).U32()),
132 static_cast<s32>(inst->Arg(1).U32()),
133 static_cast<s32>(inst->Arg(2).U32()),
134 static_cast<s32>(inst->Arg(3).U32())));
135 return;
136 default:
137 break;
138 }
139 }
140 Add(spv::ImageOperandsMask::Offset, ctx.Def(offset));
141 }
142
143 void Add(spv::ImageOperandsMask new_mask, Id value) {
144 mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) |
145 static_cast<unsigned>(new_mask));
146 operands.push_back(value);
147 }
148
149 void Add(spv::ImageOperandsMask new_mask, Id value_1, Id value_2) {
150 mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) |
151 static_cast<unsigned>(new_mask));
152 operands.push_back(value_1);
153 operands.push_back(value_2);
154 }
155
156 boost::container::static_vector<Id, 4> operands;
157 spv::ImageOperandsMask mask{};
158};
159
160Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) {
161 const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
162 if (def.count > 1) {
163 const Id pointer{ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(index))};
164 return ctx.OpLoad(def.sampled_type, pointer);
165 } else {
166 return ctx.OpLoad(def.sampled_type, def.id);
167 }
168}
169
170Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& index) {
171 if (!index.IsImmediate() || index.U32() != 0) {
172 throw NotImplementedException("Indirect image indexing");
173 }
174 if (info.type == TextureType::Buffer) {
175 const TextureBufferDefinition& def{ctx.texture_buffers.at(info.descriptor_index)};
176 if (def.count > 1) {
177 throw NotImplementedException("Indirect texture sample");
178 }
179 const Id sampler_id{def.id};
180 const Id id{ctx.OpLoad(ctx.sampled_texture_buffer_type, sampler_id)};
181 return ctx.OpImage(ctx.image_buffer_type, id);
182 } else {
183 const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
184 if (def.count > 1) {
185 throw NotImplementedException("Indirect texture sample");
186 }
187 return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, def.id));
188 }
189}
190
191Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) {
192 if (!index.IsImmediate() || index.U32() != 0) {
193 throw NotImplementedException("Indirect image indexing");
194 }
195 if (info.type == TextureType::Buffer) {
196 const ImageBufferDefinition def{ctx.image_buffers.at(info.descriptor_index)};
197 return ctx.OpLoad(def.image_type, def.id);
198 } else {
199 const ImageDefinition def{ctx.images.at(info.descriptor_index)};
200 return ctx.OpLoad(def.image_type, def.id);
201 }
202}
203
204Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) {
205 const auto info{inst->Flags<IR::TextureInstInfo>()};
206 if (info.relaxed_precision != 0) {
207 ctx.Decorate(sample, spv::Decoration::RelaxedPrecision);
208 }
209 return sample;
210}
211
212template <typename MethodPtrType, typename... Args>
213Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst,
214 Id result_type, Args&&... args) {
215 IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
216 if (!sparse) {
217 return Decorate(ctx, inst, (ctx.*non_sparse_ptr)(result_type, std::forward<Args>(args)...));
218 }
219 const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)};
220 const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward<Args>(args)...)};
221 const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)};
222 sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code));
223 sparse->Invalidate();
224 Decorate(ctx, inst, sample);
225 return ctx.OpCompositeExtract(result_type, sample, 1U);
226}
227} // Anonymous namespace
228
229Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
230 throw LogicError("Unreachable instruction");
231}
232
233Id EmitBindlessImageSampleExplicitLod(EmitContext&) {
234 throw LogicError("Unreachable instruction");
235}
236
237Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
238 throw LogicError("Unreachable instruction");
239}
240
241Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
242 throw LogicError("Unreachable instruction");
243}
244
245Id EmitBindlessImageGather(EmitContext&) {
246 throw LogicError("Unreachable instruction");
247}
248
249Id EmitBindlessImageGatherDref(EmitContext&) {
250 throw LogicError("Unreachable instruction");
251}
252
253Id EmitBindlessImageFetch(EmitContext&) {
254 throw LogicError("Unreachable instruction");
255}
256
257Id EmitBindlessImageQueryDimensions(EmitContext&) {
258 throw LogicError("Unreachable instruction");
259}
260
261Id EmitBindlessImageQueryLod(EmitContext&) {
262 throw LogicError("Unreachable instruction");
263}
264
265Id EmitBindlessImageGradient(EmitContext&) {
266 throw LogicError("Unreachable instruction");
267}
268
269Id EmitBindlessImageRead(EmitContext&) {
270 throw LogicError("Unreachable instruction");
271}
272
273Id EmitBindlessImageWrite(EmitContext&) {
274 throw LogicError("Unreachable instruction");
275}
276
277Id EmitBoundImageSampleImplicitLod(EmitContext&) {
278 throw LogicError("Unreachable instruction");
279}
280
281Id EmitBoundImageSampleExplicitLod(EmitContext&) {
282 throw LogicError("Unreachable instruction");
283}
284
285Id EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
286 throw LogicError("Unreachable instruction");
287}
288
289Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
290 throw LogicError("Unreachable instruction");
291}
292
293Id EmitBoundImageGather(EmitContext&) {
294 throw LogicError("Unreachable instruction");
295}
296
297Id EmitBoundImageGatherDref(EmitContext&) {
298 throw LogicError("Unreachable instruction");
299}
300
301Id EmitBoundImageFetch(EmitContext&) {
302 throw LogicError("Unreachable instruction");
303}
304
305Id EmitBoundImageQueryDimensions(EmitContext&) {
306 throw LogicError("Unreachable instruction");
307}
308
309Id EmitBoundImageQueryLod(EmitContext&) {
310 throw LogicError("Unreachable instruction");
311}
312
313Id EmitBoundImageGradient(EmitContext&) {
314 throw LogicError("Unreachable instruction");
315}
316
317Id EmitBoundImageRead(EmitContext&) {
318 throw LogicError("Unreachable instruction");
319}
320
321Id EmitBoundImageWrite(EmitContext&) {
322 throw LogicError("Unreachable instruction");
323}
324
325Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
326 Id bias_lc, const IR::Value& offset) {
327 const auto info{inst->Flags<IR::TextureInstInfo>()};
328 if (ctx.stage == Stage::Fragment) {
329 const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0,
330 bias_lc, offset);
331 return Emit(&EmitContext::OpImageSparseSampleImplicitLod,
332 &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4],
333 Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
334 } else {
335 // We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as
336 // if the lod was explicitly zero. This may change on Turing with implicit compute
337 // derivatives
338 const Id lod{ctx.Const(0.0f)};
339 const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset);
340 return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
341 &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
342 Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
343 }
344}
345
346Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
347 Id lod, const IR::Value& offset) {
348 const auto info{inst->Flags<IR::TextureInstInfo>()};
349 const ImageOperands operands(ctx, false, true, false, lod, offset);
350 return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
351 &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
352 Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
353}
354
355Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
356 Id coords, Id dref, Id bias_lc, const IR::Value& offset) {
357 const auto info{inst->Flags<IR::TextureInstInfo>()};
358 const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc,
359 offset);
360 return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod,
361 &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1],
362 Texture(ctx, info, index), coords, dref, operands.MaskOptional(), operands.Span());
363}
364
365Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
366 Id coords, Id dref, Id lod, const IR::Value& offset) {
367 const auto info{inst->Flags<IR::TextureInstInfo>()};
368 const ImageOperands operands(ctx, false, true, false, lod, offset);
369 return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod,
370 &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1],
371 Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span());
372}
373
374Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
375 const IR::Value& offset, const IR::Value& offset2) {
376 const auto info{inst->Flags<IR::TextureInstInfo>()};
377 const ImageOperands operands(ctx, offset, offset2);
378 return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst,
379 ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component),
380 operands.MaskOptional(), operands.Span());
381}
382
383Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
384 const IR::Value& offset, const IR::Value& offset2, Id dref) {
385 const auto info{inst->Flags<IR::TextureInstInfo>()};
386 const ImageOperands operands(ctx, offset, offset2);
387 return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst,
388 ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(),
389 operands.Span());
390}
391
392Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
393 Id lod, Id ms) {
394 const auto info{inst->Flags<IR::TextureInstInfo>()};
395 if (info.type == TextureType::Buffer) {
396 lod = Id{};
397 }
398 const ImageOperands operands(offset, lod, ms);
399 return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
400 TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
401}
402
403Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) {
404 const auto info{inst->Flags<IR::TextureInstInfo>()};
405 const Id image{TextureImage(ctx, info, index)};
406 const Id zero{ctx.u32_zero_value};
407 const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }};
408 switch (info.type) {
409 case TextureType::Color1D:
410 return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod),
411 zero, zero, mips());
412 case TextureType::ColorArray1D:
413 case TextureType::Color2D:
414 case TextureType::ColorCube:
415 return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[2], image, lod),
416 zero, mips());
417 case TextureType::ColorArray2D:
418 case TextureType::Color3D:
419 case TextureType::ColorArrayCube:
420 return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod),
421 mips());
422 case TextureType::Buffer:
423 return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySize(ctx.U32[1], image), zero,
424 zero, mips());
425 }
426 throw LogicError("Unspecified image type {}", info.type.Value());
427}
428
429Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
430 const auto info{inst->Flags<IR::TextureInstInfo>()};
431 const Id zero{ctx.f32_zero_value};
432 const Id sampler{Texture(ctx, info, index)};
433 return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], sampler, coords),
434 zero, zero);
435}
436
437Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
438 Id derivates, Id offset, Id lod_clamp) {
439 const auto info{inst->Flags<IR::TextureInstInfo>()};
440 const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates,
441 offset, lod_clamp);
442 return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
443 &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
444 Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
445}
446
447Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
448 const auto info{inst->Flags<IR::TextureInstInfo>()};
449 if (info.image_format == ImageFormat::Typeless && !ctx.profile.support_typeless_image_loads) {
450 LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host");
451 return ctx.ConstantNull(ctx.U32[4]);
452 }
453 return Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, ctx.U32[4],
454 Image(ctx, index, info), coords, std::nullopt, std::span<const Id>{});
455}
456
457void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) {
458 const auto info{inst->Flags<IR::TextureInstInfo>()};
459 ctx.OpImageWrite(Image(ctx, index, info), coords, color);
460}
461
462} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp
new file mode 100644
index 000000000..d7f1a365a
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp
@@ -0,0 +1,183 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7#include "shader_recompiler/frontend/ir/modifiers.h"
8
9namespace Shader::Backend::SPIRV {
10namespace {
11Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) {
12 if (!index.IsImmediate()) {
13 throw NotImplementedException("Indirect image indexing");
14 }
15 if (info.type == TextureType::Buffer) {
16 const ImageBufferDefinition def{ctx.image_buffers.at(index.U32())};
17 return def.id;
18 } else {
19 const ImageDefinition def{ctx.images.at(index.U32())};
20 return def.id;
21 }
22}
23
24std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
25 const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))};
26 const Id semantics{ctx.u32_zero_value};
27 return {scope, semantics};
28}
29
30Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id value,
31 Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
32 const auto info{inst->Flags<IR::TextureInstInfo>()};
33 const Id image{Image(ctx, index, info)};
34 const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, image, coords, ctx.Const(0U))};
35 const auto [scope, semantics]{AtomicArgs(ctx)};
36 return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
37}
38} // Anonymous namespace
39
40Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
41 Id value) {
42 return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicIAdd);
43}
44
45Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
46 Id value) {
47 return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMin);
48}
49
50Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
51 Id value) {
52 return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMin);
53}
54
55Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
56 Id value) {
57 return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMax);
58}
59
60Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
61 Id value) {
62 return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMax);
63}
64
65Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) {
66 // TODO: This is not yet implemented
67 throw NotImplementedException("SPIR-V Instruction");
68}
69
70Id EmitImageAtomicDec32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) {
71 // TODO: This is not yet implemented
72 throw NotImplementedException("SPIR-V Instruction");
73}
74
75Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
76 Id value) {
77 return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicAnd);
78}
79
80Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
81 Id value) {
82 return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicOr);
83}
84
85Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
86 Id value) {
87 return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicXor);
88}
89
90Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
91 Id value) {
92 return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicExchange);
93}
94
95Id EmitBindlessImageAtomicIAdd32(EmitContext&) {
96 throw NotImplementedException("SPIR-V Instruction");
97}
98
99Id EmitBindlessImageAtomicSMin32(EmitContext&) {
100 throw NotImplementedException("SPIR-V Instruction");
101}
102
103Id EmitBindlessImageAtomicUMin32(EmitContext&) {
104 throw NotImplementedException("SPIR-V Instruction");
105}
106
107Id EmitBindlessImageAtomicSMax32(EmitContext&) {
108 throw NotImplementedException("SPIR-V Instruction");
109}
110
111Id EmitBindlessImageAtomicUMax32(EmitContext&) {
112 throw NotImplementedException("SPIR-V Instruction");
113}
114
115Id EmitBindlessImageAtomicInc32(EmitContext&) {
116 throw NotImplementedException("SPIR-V Instruction");
117}
118
119Id EmitBindlessImageAtomicDec32(EmitContext&) {
120 throw NotImplementedException("SPIR-V Instruction");
121}
122
123Id EmitBindlessImageAtomicAnd32(EmitContext&) {
124 throw NotImplementedException("SPIR-V Instruction");
125}
126
127Id EmitBindlessImageAtomicOr32(EmitContext&) {
128 throw NotImplementedException("SPIR-V Instruction");
129}
130
131Id EmitBindlessImageAtomicXor32(EmitContext&) {
132 throw NotImplementedException("SPIR-V Instruction");
133}
134
135Id EmitBindlessImageAtomicExchange32(EmitContext&) {
136 throw NotImplementedException("SPIR-V Instruction");
137}
138
139Id EmitBoundImageAtomicIAdd32(EmitContext&) {
140 throw NotImplementedException("SPIR-V Instruction");
141}
142
143Id EmitBoundImageAtomicSMin32(EmitContext&) {
144 throw NotImplementedException("SPIR-V Instruction");
145}
146
147Id EmitBoundImageAtomicUMin32(EmitContext&) {
148 throw NotImplementedException("SPIR-V Instruction");
149}
150
151Id EmitBoundImageAtomicSMax32(EmitContext&) {
152 throw NotImplementedException("SPIR-V Instruction");
153}
154
155Id EmitBoundImageAtomicUMax32(EmitContext&) {
156 throw NotImplementedException("SPIR-V Instruction");
157}
158
159Id EmitBoundImageAtomicInc32(EmitContext&) {
160 throw NotImplementedException("SPIR-V Instruction");
161}
162
163Id EmitBoundImageAtomicDec32(EmitContext&) {
164 throw NotImplementedException("SPIR-V Instruction");
165}
166
167Id EmitBoundImageAtomicAnd32(EmitContext&) {
168 throw NotImplementedException("SPIR-V Instruction");
169}
170
171Id EmitBoundImageAtomicOr32(EmitContext&) {
172 throw NotImplementedException("SPIR-V Instruction");
173}
174
175Id EmitBoundImageAtomicXor32(EmitContext&) {
176 throw NotImplementedException("SPIR-V Instruction");
177}
178
179Id EmitBoundImageAtomicExchange32(EmitContext&) {
180 throw NotImplementedException("SPIR-V Instruction");
181}
182
183} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
new file mode 100644
index 000000000..f99c02848
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -0,0 +1,579 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <sirit/sirit.h>
6
7#include "common/common_types.h"
8
9namespace Shader::IR {
10enum class Attribute : u64;
11enum class Patch : u64;
12class Inst;
13class Value;
14} // namespace Shader::IR
15
16namespace Shader::Backend::SPIRV {
17
18using Sirit::Id;
19
20class EmitContext;
21
22// Microinstruction emitters
23Id EmitPhi(EmitContext& ctx, IR::Inst* inst);
24void EmitVoid(EmitContext& ctx);
25Id EmitIdentity(EmitContext& ctx, const IR::Value& value);
26Id EmitConditionRef(EmitContext& ctx, const IR::Value& value);
27void EmitReference(EmitContext&);
28void EmitPhiMove(EmitContext&);
29void EmitJoin(EmitContext& ctx);
30void EmitDemoteToHelperInvocation(EmitContext& ctx);
31void EmitBarrier(EmitContext& ctx);
32void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
33void EmitDeviceMemoryBarrier(EmitContext& ctx);
34void EmitPrologue(EmitContext& ctx);
35void EmitEpilogue(EmitContext& ctx);
36void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream);
37void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);
38void EmitGetRegister(EmitContext& ctx);
39void EmitSetRegister(EmitContext& ctx);
40void EmitGetPred(EmitContext& ctx);
41void EmitSetPred(EmitContext& ctx);
42void EmitSetGotoVariable(EmitContext& ctx);
43void EmitGetGotoVariable(EmitContext& ctx);
44void EmitSetIndirectBranchVariable(EmitContext& ctx);
45void EmitGetIndirectBranchVariable(EmitContext& ctx);
46Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
47Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
48Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
49Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
50Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
51Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
52Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
53Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex);
54void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex);
55Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex);
56void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex);
57Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);
58void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value);
59void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
60void EmitSetSampleMask(EmitContext& ctx, Id value);
61void EmitSetFragDepth(EmitContext& ctx, Id value);
62void EmitGetZFlag(EmitContext& ctx);
63void EmitGetSFlag(EmitContext& ctx);
64void EmitGetCFlag(EmitContext& ctx);
65void EmitGetOFlag(EmitContext& ctx);
66void EmitSetZFlag(EmitContext& ctx);
67void EmitSetSFlag(EmitContext& ctx);
68void EmitSetCFlag(EmitContext& ctx);
69void EmitSetOFlag(EmitContext& ctx);
70Id EmitWorkgroupId(EmitContext& ctx);
71Id EmitLocalInvocationId(EmitContext& ctx);
72Id EmitInvocationId(EmitContext& ctx);
73Id EmitSampleId(EmitContext& ctx);
74Id EmitIsHelperInvocation(EmitContext& ctx);
75Id EmitYDirection(EmitContext& ctx);
76Id EmitLoadLocal(EmitContext& ctx, Id word_offset);
77void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value);
78Id EmitUndefU1(EmitContext& ctx);
79Id EmitUndefU8(EmitContext& ctx);
80Id EmitUndefU16(EmitContext& ctx);
81Id EmitUndefU32(EmitContext& ctx);
82Id EmitUndefU64(EmitContext& ctx);
83void EmitLoadGlobalU8(EmitContext& ctx);
84void EmitLoadGlobalS8(EmitContext& ctx);
85void EmitLoadGlobalU16(EmitContext& ctx);
86void EmitLoadGlobalS16(EmitContext& ctx);
87Id EmitLoadGlobal32(EmitContext& ctx, Id address);
88Id EmitLoadGlobal64(EmitContext& ctx, Id address);
89Id EmitLoadGlobal128(EmitContext& ctx, Id address);
90void EmitWriteGlobalU8(EmitContext& ctx);
91void EmitWriteGlobalS8(EmitContext& ctx);
92void EmitWriteGlobalU16(EmitContext& ctx);
93void EmitWriteGlobalS16(EmitContext& ctx);
94void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value);
95void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value);
96void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value);
97Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
98Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
99Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
100Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
101Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
102Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
103Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
104void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
105 Id value);
106void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
107 Id value);
108void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
109 Id value);
110void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
111 Id value);
112void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
113 Id value);
114void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
115 Id value);
116void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
117 Id value);
118Id EmitLoadSharedU8(EmitContext& ctx, Id offset);
119Id EmitLoadSharedS8(EmitContext& ctx, Id offset);
120Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
121Id EmitLoadSharedS16(EmitContext& ctx, Id offset);
122Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
123Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
124Id EmitLoadSharedU128(EmitContext& ctx, Id offset);
125void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value);
126void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
127void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
128void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
129void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value);
130Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
131Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
132Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
133Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index);
134Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index);
135Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index);
136Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index);
137Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index);
138Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index);
139Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2);
140Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3);
141Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
142Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index);
143Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index);
144Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index);
145Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index);
146Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index);
147Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index);
148Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2);
149Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
150Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
151Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index);
152Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index);
153Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index);
154Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index);
155Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index);
156Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index);
157void EmitCompositeConstructF64x2(EmitContext& ctx);
158void EmitCompositeConstructF64x3(EmitContext& ctx);
159void EmitCompositeConstructF64x4(EmitContext& ctx);
160void EmitCompositeExtractF64x2(EmitContext& ctx);
161void EmitCompositeExtractF64x3(EmitContext& ctx);
162void EmitCompositeExtractF64x4(EmitContext& ctx);
163Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index);
164Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index);
165Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index);
166Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value);
167Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value);
168Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
169Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
170Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
171Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
172Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
173Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
174void EmitBitCastU16F16(EmitContext& ctx);
175Id EmitBitCastU32F32(EmitContext& ctx, Id value);
176void EmitBitCastU64F64(EmitContext& ctx);
177void EmitBitCastF16U16(EmitContext& ctx);
178Id EmitBitCastF32U32(EmitContext& ctx, Id value);
179void EmitBitCastF64U64(EmitContext& ctx);
180Id EmitPackUint2x32(EmitContext& ctx, Id value);
181Id EmitUnpackUint2x32(EmitContext& ctx, Id value);
182Id EmitPackFloat2x16(EmitContext& ctx, Id value);
183Id EmitUnpackFloat2x16(EmitContext& ctx, Id value);
184Id EmitPackHalf2x16(EmitContext& ctx, Id value);
185Id EmitUnpackHalf2x16(EmitContext& ctx, Id value);
186Id EmitPackDouble2x32(EmitContext& ctx, Id value);
187Id EmitUnpackDouble2x32(EmitContext& ctx, Id value);
188void EmitGetZeroFromOp(EmitContext& ctx);
189void EmitGetSignFromOp(EmitContext& ctx);
190void EmitGetCarryFromOp(EmitContext& ctx);
191void EmitGetOverflowFromOp(EmitContext& ctx);
192void EmitGetSparseFromOp(EmitContext& ctx);
193void EmitGetInBoundsFromOp(EmitContext& ctx);
194Id EmitFPAbs16(EmitContext& ctx, Id value);
195Id EmitFPAbs32(EmitContext& ctx, Id value);
196Id EmitFPAbs64(EmitContext& ctx, Id value);
197Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
198Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
199Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
200Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
201Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
202Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
203Id EmitFPMax32(EmitContext& ctx, Id a, Id b);
204Id EmitFPMax64(EmitContext& ctx, Id a, Id b);
205Id EmitFPMin32(EmitContext& ctx, Id a, Id b);
206Id EmitFPMin64(EmitContext& ctx, Id a, Id b);
207Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
208Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
209Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
210Id EmitFPNeg16(EmitContext& ctx, Id value);
211Id EmitFPNeg32(EmitContext& ctx, Id value);
212Id EmitFPNeg64(EmitContext& ctx, Id value);
213Id EmitFPSin(EmitContext& ctx, Id value);
214Id EmitFPCos(EmitContext& ctx, Id value);
215Id EmitFPExp2(EmitContext& ctx, Id value);
216Id EmitFPLog2(EmitContext& ctx, Id value);
217Id EmitFPRecip32(EmitContext& ctx, Id value);
218Id EmitFPRecip64(EmitContext& ctx, Id value);
219Id EmitFPRecipSqrt32(EmitContext& ctx, Id value);
220Id EmitFPRecipSqrt64(EmitContext& ctx, Id value);
221Id EmitFPSqrt(EmitContext& ctx, Id value);
222Id EmitFPSaturate16(EmitContext& ctx, Id value);
223Id EmitFPSaturate32(EmitContext& ctx, Id value);
224Id EmitFPSaturate64(EmitContext& ctx, Id value);
225Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value);
226Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value);
227Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value);
228Id EmitFPRoundEven16(EmitContext& ctx, Id value);
229Id EmitFPRoundEven32(EmitContext& ctx, Id value);
230Id EmitFPRoundEven64(EmitContext& ctx, Id value);
231Id EmitFPFloor16(EmitContext& ctx, Id value);
232Id EmitFPFloor32(EmitContext& ctx, Id value);
233Id EmitFPFloor64(EmitContext& ctx, Id value);
234Id EmitFPCeil16(EmitContext& ctx, Id value);
235Id EmitFPCeil32(EmitContext& ctx, Id value);
236Id EmitFPCeil64(EmitContext& ctx, Id value);
237Id EmitFPTrunc16(EmitContext& ctx, Id value);
238Id EmitFPTrunc32(EmitContext& ctx, Id value);
239Id EmitFPTrunc64(EmitContext& ctx, Id value);
240Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs);
241Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs);
242Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs);
243Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs);
244Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs);
245Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs);
246Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs);
247Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs);
248Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs);
249Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs);
250Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs);
251Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs);
252Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs);
253Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs);
254Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs);
255Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs);
256Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs);
257Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs);
258Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs);
259Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
260Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
261Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs);
262Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
263Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
264Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
265Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
266Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
267Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
268Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
269Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
270Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
271Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
272Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
273Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
274Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
275Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
276Id EmitFPIsNan16(EmitContext& ctx, Id value);
277Id EmitFPIsNan32(EmitContext& ctx, Id value);
278Id EmitFPIsNan64(EmitContext& ctx, Id value);
279Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
280Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
281Id EmitISub32(EmitContext& ctx, Id a, Id b);
282Id EmitISub64(EmitContext& ctx, Id a, Id b);
283Id EmitIMul32(EmitContext& ctx, Id a, Id b);
284Id EmitINeg32(EmitContext& ctx, Id value);
285Id EmitINeg64(EmitContext& ctx, Id value);
286Id EmitIAbs32(EmitContext& ctx, Id value);
287Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift);
288Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift);
289Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift);
290Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift);
291Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift);
292Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift);
293Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
294Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
295Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
296Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count);
297Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count);
298Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count);
299Id EmitBitReverse32(EmitContext& ctx, Id value);
300Id EmitBitCount32(EmitContext& ctx, Id value);
301Id EmitBitwiseNot32(EmitContext& ctx, Id value);
302Id EmitFindSMsb32(EmitContext& ctx, Id value);
303Id EmitFindUMsb32(EmitContext& ctx, Id value);
304Id EmitSMin32(EmitContext& ctx, Id a, Id b);
305Id EmitUMin32(EmitContext& ctx, Id a, Id b);
306Id EmitSMax32(EmitContext& ctx, Id a, Id b);
307Id EmitUMax32(EmitContext& ctx, Id a, Id b);
308Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max);
309Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max);
310Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs);
311Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs);
312Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs);
313Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
314Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
315Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
316Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
317Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs);
318Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
319Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
320Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value);
321Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value);
322Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value);
323Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value);
324Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value);
325Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value);
326Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value);
327Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value);
328Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value);
329Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value);
330Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value);
331Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value);
332Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
333 Id value);
334Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
335 Id value);
336Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
337 Id value);
338Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
339 Id value);
340Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
341 Id value);
342Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
343 Id value);
344Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
345 Id value);
346Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
347 Id value);
348Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
349 Id value);
350Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
351 Id value);
352Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
353 Id value);
354Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
355 Id value);
356Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
357 Id value);
358Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
359 Id value);
360Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
361 Id value);
362Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
363 Id value);
364Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
365 Id value);
366Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
367 Id value);
368Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
369 Id value);
370Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
371 Id value);
372Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
373 Id value);
374Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
375 Id value);
376Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
377 Id value);
378Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
379 Id value);
380Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
381 Id value);
382Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
383 Id value);
384Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
385 Id value);
386Id EmitGlobalAtomicIAdd32(EmitContext& ctx);
387Id EmitGlobalAtomicSMin32(EmitContext& ctx);
388Id EmitGlobalAtomicUMin32(EmitContext& ctx);
389Id EmitGlobalAtomicSMax32(EmitContext& ctx);
390Id EmitGlobalAtomicUMax32(EmitContext& ctx);
391Id EmitGlobalAtomicInc32(EmitContext& ctx);
392Id EmitGlobalAtomicDec32(EmitContext& ctx);
393Id EmitGlobalAtomicAnd32(EmitContext& ctx);
394Id EmitGlobalAtomicOr32(EmitContext& ctx);
395Id EmitGlobalAtomicXor32(EmitContext& ctx);
396Id EmitGlobalAtomicExchange32(EmitContext& ctx);
397Id EmitGlobalAtomicIAdd64(EmitContext& ctx);
398Id EmitGlobalAtomicSMin64(EmitContext& ctx);
399Id EmitGlobalAtomicUMin64(EmitContext& ctx);
400Id EmitGlobalAtomicSMax64(EmitContext& ctx);
401Id EmitGlobalAtomicUMax64(EmitContext& ctx);
402Id EmitGlobalAtomicInc64(EmitContext& ctx);
403Id EmitGlobalAtomicDec64(EmitContext& ctx);
404Id EmitGlobalAtomicAnd64(EmitContext& ctx);
405Id EmitGlobalAtomicOr64(EmitContext& ctx);
406Id EmitGlobalAtomicXor64(EmitContext& ctx);
407Id EmitGlobalAtomicExchange64(EmitContext& ctx);
408Id EmitGlobalAtomicAddF32(EmitContext& ctx);
409Id EmitGlobalAtomicAddF16x2(EmitContext& ctx);
410Id EmitGlobalAtomicAddF32x2(EmitContext& ctx);
411Id EmitGlobalAtomicMinF16x2(EmitContext& ctx);
412Id EmitGlobalAtomicMinF32x2(EmitContext& ctx);
413Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
414Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
415Id EmitLogicalOr(EmitContext& ctx, Id a, Id b);
416Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b);
417Id EmitLogicalXor(EmitContext& ctx, Id a, Id b);
418Id EmitLogicalNot(EmitContext& ctx, Id value);
419Id EmitConvertS16F16(EmitContext& ctx, Id value);
420Id EmitConvertS16F32(EmitContext& ctx, Id value);
421Id EmitConvertS16F64(EmitContext& ctx, Id value);
422Id EmitConvertS32F16(EmitContext& ctx, Id value);
423Id EmitConvertS32F32(EmitContext& ctx, Id value);
424Id EmitConvertS32F64(EmitContext& ctx, Id value);
425Id EmitConvertS64F16(EmitContext& ctx, Id value);
426Id EmitConvertS64F32(EmitContext& ctx, Id value);
427Id EmitConvertS64F64(EmitContext& ctx, Id value);
428Id EmitConvertU16F16(EmitContext& ctx, Id value);
429Id EmitConvertU16F32(EmitContext& ctx, Id value);
430Id EmitConvertU16F64(EmitContext& ctx, Id value);
431Id EmitConvertU32F16(EmitContext& ctx, Id value);
432Id EmitConvertU32F32(EmitContext& ctx, Id value);
433Id EmitConvertU32F64(EmitContext& ctx, Id value);
434Id EmitConvertU64F16(EmitContext& ctx, Id value);
435Id EmitConvertU64F32(EmitContext& ctx, Id value);
436Id EmitConvertU64F64(EmitContext& ctx, Id value);
437Id EmitConvertU64U32(EmitContext& ctx, Id value);
438Id EmitConvertU32U64(EmitContext& ctx, Id value);
439Id EmitConvertF16F32(EmitContext& ctx, Id value);
440Id EmitConvertF32F16(EmitContext& ctx, Id value);
441Id EmitConvertF32F64(EmitContext& ctx, Id value);
442Id EmitConvertF64F32(EmitContext& ctx, Id value);
443Id EmitConvertF16S8(EmitContext& ctx, Id value);
444Id EmitConvertF16S16(EmitContext& ctx, Id value);
445Id EmitConvertF16S32(EmitContext& ctx, Id value);
446Id EmitConvertF16S64(EmitContext& ctx, Id value);
447Id EmitConvertF16U8(EmitContext& ctx, Id value);
448Id EmitConvertF16U16(EmitContext& ctx, Id value);
449Id EmitConvertF16U32(EmitContext& ctx, Id value);
450Id EmitConvertF16U64(EmitContext& ctx, Id value);
451Id EmitConvertF32S8(EmitContext& ctx, Id value);
452Id EmitConvertF32S16(EmitContext& ctx, Id value);
453Id EmitConvertF32S32(EmitContext& ctx, Id value);
454Id EmitConvertF32S64(EmitContext& ctx, Id value);
455Id EmitConvertF32U8(EmitContext& ctx, Id value);
456Id EmitConvertF32U16(EmitContext& ctx, Id value);
457Id EmitConvertF32U32(EmitContext& ctx, Id value);
458Id EmitConvertF32U64(EmitContext& ctx, Id value);
459Id EmitConvertF64S8(EmitContext& ctx, Id value);
460Id EmitConvertF64S16(EmitContext& ctx, Id value);
461Id EmitConvertF64S32(EmitContext& ctx, Id value);
462Id EmitConvertF64S64(EmitContext& ctx, Id value);
463Id EmitConvertF64U8(EmitContext& ctx, Id value);
464Id EmitConvertF64U16(EmitContext& ctx, Id value);
465Id EmitConvertF64U32(EmitContext& ctx, Id value);
466Id EmitConvertF64U64(EmitContext& ctx, Id value);
467Id EmitBindlessImageSampleImplicitLod(EmitContext&);
468Id EmitBindlessImageSampleExplicitLod(EmitContext&);
469Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
470Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
471Id EmitBindlessImageGather(EmitContext&);
472Id EmitBindlessImageGatherDref(EmitContext&);
473Id EmitBindlessImageFetch(EmitContext&);
474Id EmitBindlessImageQueryDimensions(EmitContext&);
475Id EmitBindlessImageQueryLod(EmitContext&);
476Id EmitBindlessImageGradient(EmitContext&);
477Id EmitBindlessImageRead(EmitContext&);
478Id EmitBindlessImageWrite(EmitContext&);
479Id EmitBoundImageSampleImplicitLod(EmitContext&);
480Id EmitBoundImageSampleExplicitLod(EmitContext&);
481Id EmitBoundImageSampleDrefImplicitLod(EmitContext&);
482Id EmitBoundImageSampleDrefExplicitLod(EmitContext&);
483Id EmitBoundImageGather(EmitContext&);
484Id EmitBoundImageGatherDref(EmitContext&);
485Id EmitBoundImageFetch(EmitContext&);
486Id EmitBoundImageQueryDimensions(EmitContext&);
487Id EmitBoundImageQueryLod(EmitContext&);
488Id EmitBoundImageGradient(EmitContext&);
489Id EmitBoundImageRead(EmitContext&);
490Id EmitBoundImageWrite(EmitContext&);
491Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
492 Id bias_lc, const IR::Value& offset);
493Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
494 Id lod, const IR::Value& offset);
495Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
496 Id coords, Id dref, Id bias_lc, const IR::Value& offset);
497Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
498 Id coords, Id dref, Id lod, const IR::Value& offset);
499Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
500 const IR::Value& offset, const IR::Value& offset2);
501Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
502 const IR::Value& offset, const IR::Value& offset2, Id dref);
503Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
504 Id lod, Id ms);
505Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod);
506Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
507Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
508 Id derivates, Id offset, Id lod_clamp);
509Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
510void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color);
511Id EmitBindlessImageAtomicIAdd32(EmitContext&);
512Id EmitBindlessImageAtomicSMin32(EmitContext&);
513Id EmitBindlessImageAtomicUMin32(EmitContext&);
514Id EmitBindlessImageAtomicSMax32(EmitContext&);
515Id EmitBindlessImageAtomicUMax32(EmitContext&);
516Id EmitBindlessImageAtomicInc32(EmitContext&);
517Id EmitBindlessImageAtomicDec32(EmitContext&);
518Id EmitBindlessImageAtomicAnd32(EmitContext&);
519Id EmitBindlessImageAtomicOr32(EmitContext&);
520Id EmitBindlessImageAtomicXor32(EmitContext&);
521Id EmitBindlessImageAtomicExchange32(EmitContext&);
522Id EmitBoundImageAtomicIAdd32(EmitContext&);
523Id EmitBoundImageAtomicSMin32(EmitContext&);
524Id EmitBoundImageAtomicUMin32(EmitContext&);
525Id EmitBoundImageAtomicSMax32(EmitContext&);
526Id EmitBoundImageAtomicUMax32(EmitContext&);
527Id EmitBoundImageAtomicInc32(EmitContext&);
528Id EmitBoundImageAtomicDec32(EmitContext&);
529Id EmitBoundImageAtomicAnd32(EmitContext&);
530Id EmitBoundImageAtomicOr32(EmitContext&);
531Id EmitBoundImageAtomicXor32(EmitContext&);
532Id EmitBoundImageAtomicExchange32(EmitContext&);
533Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
534 Id value);
535Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
536 Id value);
537Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
538 Id value);
539Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
540 Id value);
541Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
542 Id value);
543Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
544 Id value);
545Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
546 Id value);
547Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
548 Id value);
549Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
550 Id value);
551Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
552 Id value);
553Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
554 Id value);
555Id EmitLaneId(EmitContext& ctx);
556Id EmitVoteAll(EmitContext& ctx, Id pred);
557Id EmitVoteAny(EmitContext& ctx, Id pred);
558Id EmitVoteEqual(EmitContext& ctx, Id pred);
559Id EmitSubgroupBallot(EmitContext& ctx, Id pred);
560Id EmitSubgroupEqMask(EmitContext& ctx);
561Id EmitSubgroupLtMask(EmitContext& ctx);
562Id EmitSubgroupLeMask(EmitContext& ctx);
563Id EmitSubgroupGtMask(EmitContext& ctx);
564Id EmitSubgroupGeMask(EmitContext& ctx);
565Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
566 Id segmentation_mask);
567Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
568 Id segmentation_mask);
569Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
570 Id segmentation_mask);
571Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
572 Id segmentation_mask);
573Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle);
574Id EmitDPdxFine(EmitContext& ctx, Id op_a);
575Id EmitDPdyFine(EmitContext& ctx, Id op_a);
576Id EmitDPdxCoarse(EmitContext& ctx, Id op_a);
577Id EmitDPdyCoarse(EmitContext& ctx, Id op_a);
578
579} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
new file mode 100644
index 000000000..3501d7495
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
@@ -0,0 +1,270 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9namespace {
10void SetZeroFlag(EmitContext& ctx, IR::Inst* inst, Id result) {
11 IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)};
12 if (!zero) {
13 return;
14 }
15 zero->SetDefinition(ctx.OpIEqual(ctx.U1, result, ctx.u32_zero_value));
16 zero->Invalidate();
17}
18
19void SetSignFlag(EmitContext& ctx, IR::Inst* inst, Id result) {
20 IR::Inst* const sign{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)};
21 if (!sign) {
22 return;
23 }
24 sign->SetDefinition(ctx.OpSLessThan(ctx.U1, result, ctx.u32_zero_value));
25 sign->Invalidate();
26}
27} // Anonymous namespace
28
29Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
30 Id result{};
31 if (IR::Inst* const carry{inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) {
32 const Id carry_type{ctx.TypeStruct(ctx.U32[1], ctx.U32[1])};
33 const Id carry_result{ctx.OpIAddCarry(carry_type, a, b)};
34 result = ctx.OpCompositeExtract(ctx.U32[1], carry_result, 0U);
35
36 const Id carry_value{ctx.OpCompositeExtract(ctx.U32[1], carry_result, 1U)};
37 carry->SetDefinition(ctx.OpINotEqual(ctx.U1, carry_value, ctx.u32_zero_value));
38 carry->Invalidate();
39 } else {
40 result = ctx.OpIAdd(ctx.U32[1], a, b);
41 }
42 SetZeroFlag(ctx, inst, result);
43 SetSignFlag(ctx, inst, result);
44 if (IR::Inst * overflow{inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) {
45 // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c
46 constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())};
47 const Id is_positive{ctx.OpSGreaterThanEqual(ctx.U1, a, ctx.u32_zero_value)};
48 const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Const(s32_max), a)};
49
50 const Id positive_test{ctx.OpSGreaterThan(ctx.U1, b, sub_a)};
51 const Id negative_test{ctx.OpSLessThan(ctx.U1, b, sub_a)};
52 const Id carry_flag{ctx.OpSelect(ctx.U1, is_positive, positive_test, negative_test)};
53 overflow->SetDefinition(carry_flag);
54 overflow->Invalidate();
55 }
56 return result;
57}
58
59Id EmitIAdd64(EmitContext& ctx, Id a, Id b) {
60 return ctx.OpIAdd(ctx.U64, a, b);
61}
62
63Id EmitISub32(EmitContext& ctx, Id a, Id b) {
64 return ctx.OpISub(ctx.U32[1], a, b);
65}
66
67Id EmitISub64(EmitContext& ctx, Id a, Id b) {
68 return ctx.OpISub(ctx.U64, a, b);
69}
70
71Id EmitIMul32(EmitContext& ctx, Id a, Id b) {
72 return ctx.OpIMul(ctx.U32[1], a, b);
73}
74
75Id EmitINeg32(EmitContext& ctx, Id value) {
76 return ctx.OpSNegate(ctx.U32[1], value);
77}
78
79Id EmitINeg64(EmitContext& ctx, Id value) {
80 return ctx.OpSNegate(ctx.U64, value);
81}
82
83Id EmitIAbs32(EmitContext& ctx, Id value) {
84 return ctx.OpSAbs(ctx.U32[1], value);
85}
86
87Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) {
88 return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift);
89}
90
91Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift) {
92 return ctx.OpShiftLeftLogical(ctx.U64, base, shift);
93}
94
95Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift) {
96 return ctx.OpShiftRightLogical(ctx.U32[1], base, shift);
97}
98
99Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift) {
100 return ctx.OpShiftRightLogical(ctx.U64, base, shift);
101}
102
103Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift) {
104 return ctx.OpShiftRightArithmetic(ctx.U32[1], base, shift);
105}
106
107Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift) {
108 return ctx.OpShiftRightArithmetic(ctx.U64, base, shift);
109}
110
111Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
112 const Id result{ctx.OpBitwiseAnd(ctx.U32[1], a, b)};
113 SetZeroFlag(ctx, inst, result);
114 SetSignFlag(ctx, inst, result);
115 return result;
116}
117
118Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
119 const Id result{ctx.OpBitwiseOr(ctx.U32[1], a, b)};
120 SetZeroFlag(ctx, inst, result);
121 SetSignFlag(ctx, inst, result);
122 return result;
123}
124
125Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
126 const Id result{ctx.OpBitwiseXor(ctx.U32[1], a, b)};
127 SetZeroFlag(ctx, inst, result);
128 SetSignFlag(ctx, inst, result);
129 return result;
130}
131
132Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count) {
133 return ctx.OpBitFieldInsert(ctx.U32[1], base, insert, offset, count);
134}
135
136Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) {
137 const Id result{ctx.OpBitFieldSExtract(ctx.U32[1], base, offset, count)};
138 SetZeroFlag(ctx, inst, result);
139 SetSignFlag(ctx, inst, result);
140 return result;
141}
142
143Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) {
144 const Id result{ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count)};
145 SetZeroFlag(ctx, inst, result);
146 SetSignFlag(ctx, inst, result);
147 return result;
148}
149
150Id EmitBitReverse32(EmitContext& ctx, Id value) {
151 return ctx.OpBitReverse(ctx.U32[1], value);
152}
153
154Id EmitBitCount32(EmitContext& ctx, Id value) {
155 return ctx.OpBitCount(ctx.U32[1], value);
156}
157
158Id EmitBitwiseNot32(EmitContext& ctx, Id value) {
159 return ctx.OpNot(ctx.U32[1], value);
160}
161
162Id EmitFindSMsb32(EmitContext& ctx, Id value) {
163 return ctx.OpFindSMsb(ctx.U32[1], value);
164}
165
166Id EmitFindUMsb32(EmitContext& ctx, Id value) {
167 return ctx.OpFindUMsb(ctx.U32[1], value);
168}
169
170Id EmitSMin32(EmitContext& ctx, Id a, Id b) {
171 const bool is_broken{ctx.profile.has_broken_signed_operations};
172 if (is_broken) {
173 a = ctx.OpBitcast(ctx.S32[1], a);
174 b = ctx.OpBitcast(ctx.S32[1], b);
175 }
176 const Id result{ctx.OpSMin(ctx.U32[1], a, b)};
177 return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result;
178}
179
180Id EmitUMin32(EmitContext& ctx, Id a, Id b) {
181 return ctx.OpUMin(ctx.U32[1], a, b);
182}
183
184Id EmitSMax32(EmitContext& ctx, Id a, Id b) {
185 const bool is_broken{ctx.profile.has_broken_signed_operations};
186 if (is_broken) {
187 a = ctx.OpBitcast(ctx.S32[1], a);
188 b = ctx.OpBitcast(ctx.S32[1], b);
189 }
190 const Id result{ctx.OpSMax(ctx.U32[1], a, b)};
191 return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result;
192}
193
194Id EmitUMax32(EmitContext& ctx, Id a, Id b) {
195 return ctx.OpUMax(ctx.U32[1], a, b);
196}
197
198Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) {
199 Id result{};
200 if (ctx.profile.has_broken_signed_operations || ctx.profile.has_broken_spirv_clamp) {
201 value = ctx.OpBitcast(ctx.S32[1], value);
202 min = ctx.OpBitcast(ctx.S32[1], min);
203 max = ctx.OpBitcast(ctx.S32[1], max);
204 if (ctx.profile.has_broken_spirv_clamp) {
205 result = ctx.OpSMax(ctx.S32[1], ctx.OpSMin(ctx.S32[1], value, max), min);
206 } else {
207 result = ctx.OpSClamp(ctx.S32[1], value, min, max);
208 }
209 result = ctx.OpBitcast(ctx.U32[1], result);
210 } else {
211 result = ctx.OpSClamp(ctx.U32[1], value, min, max);
212 }
213 SetZeroFlag(ctx, inst, result);
214 SetSignFlag(ctx, inst, result);
215 return result;
216}
217
218Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) {
219 Id result{};
220 if (ctx.profile.has_broken_spirv_clamp) {
221 result = ctx.OpUMax(ctx.U32[1], ctx.OpUMin(ctx.U32[1], value, max), min);
222 } else {
223 result = ctx.OpUClamp(ctx.U32[1], value, min, max);
224 }
225 SetZeroFlag(ctx, inst, result);
226 SetSignFlag(ctx, inst, result);
227 return result;
228}
229
230Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) {
231 return ctx.OpSLessThan(ctx.U1, lhs, rhs);
232}
233
234Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) {
235 return ctx.OpULessThan(ctx.U1, lhs, rhs);
236}
237
238Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs) {
239 return ctx.OpIEqual(ctx.U1, lhs, rhs);
240}
241
242Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
243 return ctx.OpSLessThanEqual(ctx.U1, lhs, rhs);
244}
245
246Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
247 return ctx.OpULessThanEqual(ctx.U1, lhs, rhs);
248}
249
250Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
251 return ctx.OpSGreaterThan(ctx.U1, lhs, rhs);
252}
253
254Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
255 return ctx.OpUGreaterThan(ctx.U1, lhs, rhs);
256}
257
258Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs) {
259 return ctx.OpINotEqual(ctx.U1, lhs, rhs);
260}
261
262Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
263 return ctx.OpSGreaterThanEqual(ctx.U1, lhs, rhs);
264}
265
266Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
267 return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs);
268}
269
270} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
new file mode 100644
index 000000000..b9a9500fc
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
@@ -0,0 +1,26 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9
10Id EmitLogicalOr(EmitContext& ctx, Id a, Id b) {
11 return ctx.OpLogicalOr(ctx.U1, a, b);
12}
13
14Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b) {
15 return ctx.OpLogicalAnd(ctx.U1, a, b);
16}
17
18Id EmitLogicalXor(EmitContext& ctx, Id a, Id b) {
19 return ctx.OpLogicalNotEqual(ctx.U1, a, b);
20}
21
22Id EmitLogicalNot(EmitContext& ctx, Id value) {
23 return ctx.OpLogicalNot(ctx.U1, value);
24}
25
26} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
new file mode 100644
index 000000000..679ee2684
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
@@ -0,0 +1,275 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <bit>
6
7#include "shader_recompiler/backend/spirv/emit_spirv.h"
8#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
9
10namespace Shader::Backend::SPIRV {
11namespace {
12Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size,
13 u32 index_offset = 0) {
14 if (offset.IsImmediate()) {
15 const u32 imm_offset{static_cast<u32>(offset.U32() / element_size) + index_offset};
16 return ctx.Const(imm_offset);
17 }
18 const u32 shift{static_cast<u32>(std::countr_zero(element_size))};
19 Id index{ctx.Def(offset)};
20 if (shift != 0) {
21 const Id shift_id{ctx.Const(shift)};
22 index = ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id);
23 }
24 if (index_offset != 0) {
25 index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset));
26 }
27 return index;
28}
29
30Id StoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
31 const StorageTypeDefinition& type_def, size_t element_size,
32 Id StorageDefinitions::*member_ptr, u32 index_offset = 0) {
33 if (!binding.IsImmediate()) {
34 throw NotImplementedException("Dynamic storage buffer indexing");
35 }
36 const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr};
37 const Id index{StorageIndex(ctx, offset, element_size, index_offset)};
38 return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index);
39}
40
41Id LoadStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id result_type,
42 const StorageTypeDefinition& type_def, size_t element_size,
43 Id StorageDefinitions::*member_ptr, u32 index_offset = 0) {
44 const Id pointer{
45 StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)};
46 return ctx.OpLoad(result_type, pointer);
47}
48
49Id LoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
50 u32 index_offset = 0) {
51 return LoadStorage(ctx, binding, offset, ctx.U32[1], ctx.storage_types.U32, sizeof(u32),
52 &StorageDefinitions::U32, index_offset);
53}
54
55void WriteStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
56 const StorageTypeDefinition& type_def, size_t element_size,
57 Id StorageDefinitions::*member_ptr, u32 index_offset = 0) {
58 const Id pointer{
59 StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)};
60 ctx.OpStore(pointer, value);
61}
62
63void WriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
64 u32 index_offset = 0) {
65 WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32),
66 &StorageDefinitions::U32, index_offset);
67}
68} // Anonymous namespace
69
70void EmitLoadGlobalU8(EmitContext&) {
71 throw NotImplementedException("SPIR-V Instruction");
72}
73
74void EmitLoadGlobalS8(EmitContext&) {
75 throw NotImplementedException("SPIR-V Instruction");
76}
77
78void EmitLoadGlobalU16(EmitContext&) {
79 throw NotImplementedException("SPIR-V Instruction");
80}
81
82void EmitLoadGlobalS16(EmitContext&) {
83 throw NotImplementedException("SPIR-V Instruction");
84}
85
86Id EmitLoadGlobal32(EmitContext& ctx, Id address) {
87 if (ctx.profile.support_int64) {
88 return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address);
89 }
90 LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
91 return ctx.Const(0u);
92}
93
94Id EmitLoadGlobal64(EmitContext& ctx, Id address) {
95 if (ctx.profile.support_int64) {
96 return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address);
97 }
98 LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
99 return ctx.Const(0u, 0u);
100}
101
102Id EmitLoadGlobal128(EmitContext& ctx, Id address) {
103 if (ctx.profile.support_int64) {
104 return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address);
105 }
106 LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
107 return ctx.Const(0u, 0u, 0u, 0u);
108}
109
110void EmitWriteGlobalU8(EmitContext&) {
111 throw NotImplementedException("SPIR-V Instruction");
112}
113
114void EmitWriteGlobalS8(EmitContext&) {
115 throw NotImplementedException("SPIR-V Instruction");
116}
117
118void EmitWriteGlobalU16(EmitContext&) {
119 throw NotImplementedException("SPIR-V Instruction");
120}
121
122void EmitWriteGlobalS16(EmitContext&) {
123 throw NotImplementedException("SPIR-V Instruction");
124}
125
126void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) {
127 if (ctx.profile.support_int64) {
128 ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value);
129 return;
130 }
131 LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
132}
133
134void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) {
135 if (ctx.profile.support_int64) {
136 ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value);
137 return;
138 }
139 LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
140}
141
142void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) {
143 if (ctx.profile.support_int64) {
144 ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value);
145 return;
146 }
147 LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
148}
149
150Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
151 if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) {
152 return ctx.OpUConvert(ctx.U32[1],
153 LoadStorage(ctx, binding, offset, ctx.U8, ctx.storage_types.U8,
154 sizeof(u8), &StorageDefinitions::U8));
155 } else {
156 return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
157 ctx.BitOffset8(offset), ctx.Const(8u));
158 }
159}
160
161Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
162 if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) {
163 return ctx.OpSConvert(ctx.U32[1],
164 LoadStorage(ctx, binding, offset, ctx.S8, ctx.storage_types.S8,
165 sizeof(s8), &StorageDefinitions::S8));
166 } else {
167 return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
168 ctx.BitOffset8(offset), ctx.Const(8u));
169 }
170}
171
172Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
173 if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) {
174 return ctx.OpUConvert(ctx.U32[1],
175 LoadStorage(ctx, binding, offset, ctx.U16, ctx.storage_types.U16,
176 sizeof(u16), &StorageDefinitions::U16));
177 } else {
178 return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
179 ctx.BitOffset16(offset), ctx.Const(16u));
180 }
181}
182
183Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
184 if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) {
185 return ctx.OpSConvert(ctx.U32[1],
186 LoadStorage(ctx, binding, offset, ctx.S16, ctx.storage_types.S16,
187 sizeof(s16), &StorageDefinitions::S16));
188 } else {
189 return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
190 ctx.BitOffset16(offset), ctx.Const(16u));
191 }
192}
193
194Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
195 return LoadStorage32(ctx, binding, offset);
196}
197
198Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
199 if (ctx.profile.support_descriptor_aliasing) {
200 return LoadStorage(ctx, binding, offset, ctx.U32[2], ctx.storage_types.U32x2,
201 sizeof(u32[2]), &StorageDefinitions::U32x2);
202 } else {
203 return ctx.OpCompositeConstruct(ctx.U32[2], LoadStorage32(ctx, binding, offset, 0),
204 LoadStorage32(ctx, binding, offset, 1));
205 }
206}
207
208Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
209 if (ctx.profile.support_descriptor_aliasing) {
210 return LoadStorage(ctx, binding, offset, ctx.U32[4], ctx.storage_types.U32x4,
211 sizeof(u32[4]), &StorageDefinitions::U32x4);
212 } else {
213 return ctx.OpCompositeConstruct(ctx.U32[4], LoadStorage32(ctx, binding, offset, 0),
214 LoadStorage32(ctx, binding, offset, 1),
215 LoadStorage32(ctx, binding, offset, 2),
216 LoadStorage32(ctx, binding, offset, 3));
217 }
218}
219
220void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
221 Id value) {
222 WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8,
223 sizeof(u8), &StorageDefinitions::U8);
224}
225
226void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
227 Id value) {
228 WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8,
229 sizeof(s8), &StorageDefinitions::S8);
230}
231
232void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
233 Id value) {
234 WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16,
235 sizeof(u16), &StorageDefinitions::U16);
236}
237
238void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
239 Id value) {
240 WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16,
241 sizeof(s16), &StorageDefinitions::S16);
242}
243
244void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
245 Id value) {
246 WriteStorage32(ctx, binding, offset, value);
247}
248
249void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
250 Id value) {
251 if (ctx.profile.support_descriptor_aliasing) {
252 WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x2, sizeof(u32[2]),
253 &StorageDefinitions::U32x2);
254 } else {
255 for (u32 index = 0; index < 2; ++index) {
256 const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)};
257 WriteStorage32(ctx, binding, offset, element, index);
258 }
259 }
260}
261
262void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
263 Id value) {
264 if (ctx.profile.support_descriptor_aliasing) {
265 WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x4, sizeof(u32[4]),
266 &StorageDefinitions::U32x4);
267 } else {
268 for (u32 index = 0; index < 4; ++index) {
269 const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)};
270 WriteStorage32(ctx, binding, offset, element, index);
271 }
272 }
273}
274
275} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
new file mode 100644
index 000000000..c5b4f4720
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
@@ -0,0 +1,42 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9
10Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
11 return ctx.OpSelect(ctx.U1, cond, true_value, false_value);
12}
13
14Id EmitSelectU8(EmitContext&, Id, Id, Id) {
15 throw NotImplementedException("SPIR-V Instruction");
16}
17
18Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
19 return ctx.OpSelect(ctx.U16, cond, true_value, false_value);
20}
21
22Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
23 return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value);
24}
25
26Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
27 return ctx.OpSelect(ctx.U64, cond, true_value, false_value);
28}
29
30Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
31 return ctx.OpSelect(ctx.F16[1], cond, true_value, false_value);
32}
33
34Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
35 return ctx.OpSelect(ctx.F32[1], cond, true_value, false_value);
36}
37
38Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
39 return ctx.OpSelect(ctx.F64[1], cond, true_value, false_value);
40}
41
42} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
new file mode 100644
index 000000000..9a79fc7a2
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
@@ -0,0 +1,174 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9namespace {
10Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) {
11 const Id shift_id{ctx.Const(shift)};
12 const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
13 return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
14}
15
16Id Word(EmitContext& ctx, Id offset) {
17 const Id shift_id{ctx.Const(2U)};
18 const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
19 const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
20 return ctx.OpLoad(ctx.U32[1], pointer);
21}
22
23std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) {
24 const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Const(3U))};
25 const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(mask))};
26 const Id count_id{ctx.Const(count)};
27 return {bit, count_id};
28}
29} // Anonymous namespace
30
31Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
32 if (ctx.profile.support_explicit_workgroup_layout) {
33 const Id pointer{
34 ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
35 return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
36 } else {
37 const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
38 return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
39 }
40}
41
42Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
43 if (ctx.profile.support_explicit_workgroup_layout) {
44 const Id pointer{
45 ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
46 return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
47 } else {
48 const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
49 return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
50 }
51}
52
53Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
54 if (ctx.profile.support_explicit_workgroup_layout) {
55 const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
56 return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
57 } else {
58 const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
59 return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
60 }
61}
62
63Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
64 if (ctx.profile.support_explicit_workgroup_layout) {
65 const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
66 return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
67 } else {
68 const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
69 return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
70 }
71}
72
73Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
74 if (ctx.profile.support_explicit_workgroup_layout) {
75 const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)};
76 return ctx.OpLoad(ctx.U32[1], pointer);
77 } else {
78 return Word(ctx, offset);
79 }
80}
81
82Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
83 if (ctx.profile.support_explicit_workgroup_layout) {
84 const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
85 return ctx.OpLoad(ctx.U32[2], pointer);
86 } else {
87 const Id shift_id{ctx.Const(2U)};
88 const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
89 const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(1U))};
90 const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
91 const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
92 return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
93 ctx.OpLoad(ctx.U32[1], rhs_pointer));
94 }
95}
96
97Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
98 if (ctx.profile.support_explicit_workgroup_layout) {
99 const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
100 return ctx.OpLoad(ctx.U32[4], pointer);
101 }
102 const Id shift_id{ctx.Const(2U)};
103 const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
104 std::array<Id, 4> values{};
105 for (u32 i = 0; i < 4; ++i) {
106 const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))};
107 const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
108 values[i] = ctx.OpLoad(ctx.U32[1], pointer);
109 }
110 return ctx.OpCompositeConstruct(ctx.U32[4], values);
111}
112
113void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
114 if (ctx.profile.support_explicit_workgroup_layout) {
115 const Id pointer{
116 ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
117 ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
118 } else {
119 ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value);
120 }
121}
122
123void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
124 if (ctx.profile.support_explicit_workgroup_layout) {
125 const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
126 ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
127 } else {
128 ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value);
129 }
130}
131
132void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
133 Id pointer{};
134 if (ctx.profile.support_explicit_workgroup_layout) {
135 pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2);
136 } else {
137 const Id shift{ctx.Const(2U)};
138 const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
139 pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
140 }
141 ctx.OpStore(pointer, value);
142}
143
144void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
145 if (ctx.profile.support_explicit_workgroup_layout) {
146 const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
147 ctx.OpStore(pointer, value);
148 return;
149 }
150 const Id shift{ctx.Const(2U)};
151 const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
152 const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Const(1U))};
153 const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
154 const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
155 ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
156 ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
157}
158
159void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
160 if (ctx.profile.support_explicit_workgroup_layout) {
161 const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
162 ctx.OpStore(pointer, value);
163 return;
164 }
165 const Id shift{ctx.Const(2U)};
166 const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
167 for (u32 i = 0; i < 4; ++i) {
168 const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))};
169 const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
170 ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
171 }
172}
173
174} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
new file mode 100644
index 000000000..9e7eb3cb1
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
@@ -0,0 +1,150 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9namespace {
10void ConvertDepthMode(EmitContext& ctx) {
11 const Id type{ctx.F32[1]};
12 const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)};
13 const Id z{ctx.OpCompositeExtract(type, position, 2u)};
14 const Id w{ctx.OpCompositeExtract(type, position, 3u)};
15 const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))};
16 const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)};
17 ctx.OpStore(ctx.output_position, vector);
18}
19
20void SetFixedPipelinePointSize(EmitContext& ctx) {
21 if (ctx.runtime_info.fixed_state_point_size) {
22 const float point_size{*ctx.runtime_info.fixed_state_point_size};
23 ctx.OpStore(ctx.output_point_size, ctx.Const(point_size));
24 }
25}
26
27Id DefaultVarying(EmitContext& ctx, u32 num_components, u32 element, Id zero, Id one,
28 Id default_vector) {
29 switch (num_components) {
30 case 1:
31 return element == 3 ? one : zero;
32 case 2:
33 return ctx.ConstantComposite(ctx.F32[2], zero, element + 1 == 3 ? one : zero);
34 case 3:
35 return ctx.ConstantComposite(ctx.F32[3], zero, zero, element + 2 == 3 ? one : zero);
36 case 4:
37 return default_vector;
38 }
39 throw InvalidArgument("Bad element");
40}
41
42Id ComparisonFunction(EmitContext& ctx, CompareFunction comparison, Id operand_1, Id operand_2) {
43 switch (comparison) {
44 case CompareFunction::Never:
45 return ctx.false_value;
46 case CompareFunction::Less:
47 return ctx.OpFOrdLessThan(ctx.U1, operand_1, operand_2);
48 case CompareFunction::Equal:
49 return ctx.OpFOrdEqual(ctx.U1, operand_1, operand_2);
50 case CompareFunction::LessThanEqual:
51 return ctx.OpFOrdLessThanEqual(ctx.U1, operand_1, operand_2);
52 case CompareFunction::Greater:
53 return ctx.OpFOrdGreaterThan(ctx.U1, operand_1, operand_2);
54 case CompareFunction::NotEqual:
55 return ctx.OpFOrdNotEqual(ctx.U1, operand_1, operand_2);
56 case CompareFunction::GreaterThanEqual:
57 return ctx.OpFOrdGreaterThanEqual(ctx.U1, operand_1, operand_2);
58 case CompareFunction::Always:
59 return ctx.true_value;
60 }
61 throw InvalidArgument("Comparison function {}", comparison);
62}
63
64void AlphaTest(EmitContext& ctx) {
65 if (!ctx.runtime_info.alpha_test_func) {
66 return;
67 }
68 const auto comparison{*ctx.runtime_info.alpha_test_func};
69 if (comparison == CompareFunction::Always) {
70 return;
71 }
72 if (!Sirit::ValidId(ctx.frag_color[0])) {
73 return;
74 }
75
76 const Id type{ctx.F32[1]};
77 const Id rt0_color{ctx.OpLoad(ctx.F32[4], ctx.frag_color[0])};
78 const Id alpha{ctx.OpCompositeExtract(type, rt0_color, 3u)};
79
80 const Id true_label{ctx.OpLabel()};
81 const Id discard_label{ctx.OpLabel()};
82 const Id alpha_reference{ctx.Const(ctx.runtime_info.alpha_test_reference)};
83 const Id condition{ComparisonFunction(ctx, comparison, alpha, alpha_reference)};
84
85 ctx.OpSelectionMerge(true_label, spv::SelectionControlMask::MaskNone);
86 ctx.OpBranchConditional(condition, true_label, discard_label);
87 ctx.AddLabel(discard_label);
88 ctx.OpKill();
89 ctx.AddLabel(true_label);
90}
91} // Anonymous namespace
92
93void EmitPrologue(EmitContext& ctx) {
94 if (ctx.stage == Stage::VertexB) {
95 const Id zero{ctx.Const(0.0f)};
96 const Id one{ctx.Const(1.0f)};
97 const Id default_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one)};
98 ctx.OpStore(ctx.output_position, default_vector);
99 for (const auto& info : ctx.output_generics) {
100 if (info[0].num_components == 0) {
101 continue;
102 }
103 u32 element{0};
104 while (element < 4) {
105 const auto& element_info{info[element]};
106 const u32 num{element_info.num_components};
107 const Id value{DefaultVarying(ctx, num, element, zero, one, default_vector)};
108 ctx.OpStore(element_info.id, value);
109 element += num;
110 }
111 }
112 }
113 if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) {
114 SetFixedPipelinePointSize(ctx);
115 }
116}
117
118void EmitEpilogue(EmitContext& ctx) {
119 if (ctx.stage == Stage::VertexB && ctx.runtime_info.convert_depth_mode) {
120 ConvertDepthMode(ctx);
121 }
122 if (ctx.stage == Stage::Fragment) {
123 AlphaTest(ctx);
124 }
125}
126
127void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
128 if (ctx.runtime_info.convert_depth_mode) {
129 ConvertDepthMode(ctx);
130 }
131 if (stream.IsImmediate()) {
132 ctx.OpEmitStreamVertex(ctx.Def(stream));
133 } else {
134 LOG_WARNING(Shader_SPIRV, "Stream is not immediate");
135 ctx.OpEmitStreamVertex(ctx.u32_zero_value);
136 }
137 // Restore fixed pipeline point size after emitting the vertex
138 SetFixedPipelinePointSize(ctx);
139}
140
141void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
142 if (stream.IsImmediate()) {
143 ctx.OpEndStreamPrimitive(ctx.Def(stream));
144 } else {
145 LOG_WARNING(Shader_SPIRV, "Stream is not immediate");
146 ctx.OpEndStreamPrimitive(ctx.u32_zero_value);
147 }
148}
149
150} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
new file mode 100644
index 000000000..c9f469e90
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
@@ -0,0 +1,30 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9
10Id EmitUndefU1(EmitContext& ctx) {
11 return ctx.OpUndef(ctx.U1);
12}
13
14Id EmitUndefU8(EmitContext&) {
15 throw NotImplementedException("SPIR-V Instruction");
16}
17
18Id EmitUndefU16(EmitContext&) {
19 throw NotImplementedException("SPIR-V Instruction");
20}
21
22Id EmitUndefU32(EmitContext& ctx) {
23 return ctx.OpUndef(ctx.U32[1]);
24}
25
26Id EmitUndefU64(EmitContext&) {
27 throw NotImplementedException("SPIR-V Instruction");
28}
29
30} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
new file mode 100644
index 000000000..78b1e1ba7
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -0,0 +1,203 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9namespace {
10Id WarpExtract(EmitContext& ctx, Id value) {
11 const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
12 return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
13}
14
15Id LoadMask(EmitContext& ctx, Id mask) {
16 const Id value{ctx.OpLoad(ctx.U32[4], mask)};
17 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
18 return ctx.OpCompositeExtract(ctx.U32[1], value, 0U);
19 }
20 return WarpExtract(ctx, value);
21}
22
23void SetInBoundsFlag(IR::Inst* inst, Id result) {
24 IR::Inst* const in_bounds{inst->GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
25 if (!in_bounds) {
26 return;
27 }
28 in_bounds->SetDefinition(result);
29 in_bounds->Invalidate();
30}
31
32Id ComputeMinThreadId(EmitContext& ctx, Id thread_id, Id segmentation_mask) {
33 return ctx.OpBitwiseAnd(ctx.U32[1], thread_id, segmentation_mask);
34}
35
36Id ComputeMaxThreadId(EmitContext& ctx, Id min_thread_id, Id clamp, Id not_seg_mask) {
37 return ctx.OpBitwiseOr(ctx.U32[1], min_thread_id,
38 ctx.OpBitwiseAnd(ctx.U32[1], clamp, not_seg_mask));
39}
40
41Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask) {
42 const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
43 const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
44 return ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask);
45}
46
47Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
48 return ctx.OpSelect(ctx.U32[1], in_range,
49 ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value);
50}
51} // Anonymous namespace
52
53Id EmitLaneId(EmitContext& ctx) {
54 const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
55 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
56 return id;
57 }
58 return ctx.OpBitwiseAnd(ctx.U32[1], id, ctx.Const(31U));
59}
60
61Id EmitVoteAll(EmitContext& ctx, Id pred) {
62 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
63 return ctx.OpSubgroupAllKHR(ctx.U1, pred);
64 }
65 const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
66 const Id active_mask{WarpExtract(ctx, mask_ballot)};
67 const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
68 const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
69 return ctx.OpIEqual(ctx.U1, lhs, active_mask);
70}
71
72Id EmitVoteAny(EmitContext& ctx, Id pred) {
73 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
74 return ctx.OpSubgroupAnyKHR(ctx.U1, pred);
75 }
76 const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
77 const Id active_mask{WarpExtract(ctx, mask_ballot)};
78 const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
79 const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
80 return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value);
81}
82
83Id EmitVoteEqual(EmitContext& ctx, Id pred) {
84 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
85 return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred);
86 }
87 const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
88 const Id active_mask{WarpExtract(ctx, mask_ballot)};
89 const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
90 const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)};
91 return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value),
92 ctx.OpIEqual(ctx.U1, lhs, active_mask));
93}
94
95Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
96 const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)};
97 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
98 return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U);
99 }
100 return WarpExtract(ctx, ballot);
101}
102
103Id EmitSubgroupEqMask(EmitContext& ctx) {
104 return LoadMask(ctx, ctx.subgroup_mask_eq);
105}
106
107Id EmitSubgroupLtMask(EmitContext& ctx) {
108 return LoadMask(ctx, ctx.subgroup_mask_lt);
109}
110
111Id EmitSubgroupLeMask(EmitContext& ctx) {
112 return LoadMask(ctx, ctx.subgroup_mask_le);
113}
114
115Id EmitSubgroupGtMask(EmitContext& ctx) {
116 return LoadMask(ctx, ctx.subgroup_mask_gt);
117}
118
119Id EmitSubgroupGeMask(EmitContext& ctx) {
120 return LoadMask(ctx, ctx.subgroup_mask_ge);
121}
122
123Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
124 Id segmentation_mask) {
125 const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
126 const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
127 const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
128 const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
129
130 const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)};
131 const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
132 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
133
134 SetInBoundsFlag(inst, in_range);
135 return SelectValue(ctx, in_range, value, src_thread_id);
136}
137
138Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
139 Id segmentation_mask) {
140 const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
141 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
142 const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
143 const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
144
145 SetInBoundsFlag(inst, in_range);
146 return SelectValue(ctx, in_range, value, src_thread_id);
147}
148
149Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
150 Id segmentation_mask) {
151 const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
152 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
153 const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
154 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
155
156 SetInBoundsFlag(inst, in_range);
157 return SelectValue(ctx, in_range, value, src_thread_id);
158}
159
160Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
161 Id segmentation_mask) {
162 const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
163 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
164 const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
165 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
166
167 SetInBoundsFlag(inst, in_range);
168 return SelectValue(ctx, in_range, value, src_thread_id);
169}
170
171Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) {
172 const Id three{ctx.Const(3U)};
173 Id mask{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
174 mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three);
175 mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Const(1U));
176 mask = ctx.OpShiftRightLogical(ctx.U32[1], swizzle, mask);
177 mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three);
178
179 const Id modifier_a{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_a, mask)};
180 const Id modifier_b{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_b, mask)};
181
182 const Id result_a{ctx.OpFMul(ctx.F32[1], op_a, modifier_a)};
183 const Id result_b{ctx.OpFMul(ctx.F32[1], op_b, modifier_b)};
184 return ctx.OpFAdd(ctx.F32[1], result_a, result_b);
185}
186
187Id EmitDPdxFine(EmitContext& ctx, Id op_a) {
188 return ctx.OpDPdxFine(ctx.F32[1], op_a);
189}
190
191Id EmitDPdyFine(EmitContext& ctx, Id op_a) {
192 return ctx.OpDPdyFine(ctx.F32[1], op_a);
193}
194
195Id EmitDPdxCoarse(EmitContext& ctx, Id op_a) {
196 return ctx.OpDPdxCoarse(ctx.F32[1], op_a);
197}
198
199Id EmitDPdyCoarse(EmitContext& ctx, Id op_a) {
200 return ctx.OpDPdyCoarse(ctx.F32[1], op_a);
201}
202
203} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h
new file mode 100644
index 000000000..8369d0d84
--- /dev/null
+++ b/src/shader_recompiler/environment.h
@@ -0,0 +1,53 @@
1#pragma once
2
3#include <array>
4
5#include "common/common_types.h"
6#include "shader_recompiler/program_header.h"
7#include "shader_recompiler/shader_info.h"
8#include "shader_recompiler/stage.h"
9
10namespace Shader {
11
12class Environment {
13public:
14 virtual ~Environment() = default;
15
16 [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0;
17
18 [[nodiscard]] virtual u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) = 0;
19
20 [[nodiscard]] virtual TextureType ReadTextureType(u32 raw_handle) = 0;
21
22 [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0;
23
24 [[nodiscard]] virtual u32 LocalMemorySize() const = 0;
25
26 [[nodiscard]] virtual u32 SharedMemorySize() const = 0;
27
28 [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0;
29
30 [[nodiscard]] const ProgramHeader& SPH() const noexcept {
31 return sph;
32 }
33
34 [[nodiscard]] const std::array<u32, 8>& GpPassthroughMask() const noexcept {
35 return gp_passthrough_mask;
36 }
37
38 [[nodiscard]] Stage ShaderStage() const noexcept {
39 return stage;
40 }
41
42 [[nodiscard]] u32 StartAddress() const noexcept {
43 return start_address;
44 }
45
46protected:
47 ProgramHeader sph{};
48 std::array<u32, 8> gp_passthrough_mask{};
49 Stage stage{};
50 u32 start_address{};
51};
52
53} // namespace Shader
diff --git a/src/shader_recompiler/exception.h b/src/shader_recompiler/exception.h
new file mode 100644
index 000000000..337e7f0c8
--- /dev/null
+++ b/src/shader_recompiler/exception.h
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <stdexcept>
8#include <string>
9#include <string_view>
10#include <utility>
11
12#include <fmt/format.h>
13
14namespace Shader {
15
16class Exception : public std::exception {
17public:
18 explicit Exception(std::string message) noexcept : err_message{std::move(message)} {}
19
20 const char* what() const noexcept override {
21 return err_message.c_str();
22 }
23
24 void Prepend(std::string_view prepend) {
25 err_message.insert(0, prepend);
26 }
27
28 void Append(std::string_view append) {
29 err_message += append;
30 }
31
32private:
33 std::string err_message;
34};
35
36class LogicError : public Exception {
37public:
38 template <typename... Args>
39 LogicError(const char* message, Args&&... args)
40 : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
41};
42
43class RuntimeError : public Exception {
44public:
45 template <typename... Args>
46 RuntimeError(const char* message, Args&&... args)
47 : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
48};
49
50class NotImplementedException : public Exception {
51public:
52 template <typename... Args>
53 NotImplementedException(const char* message, Args&&... args)
54 : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {
55 Append(" is not implemented");
56 }
57};
58
59class InvalidArgument : public Exception {
60public:
61 template <typename... Args>
62 InvalidArgument(const char* message, Args&&... args)
63 : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
64};
65
66} // namespace Shader
diff --git a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h
new file mode 100644
index 000000000..b61773487
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h
@@ -0,0 +1,58 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::IR {
12
13class Block;
14
15struct AbstractSyntaxNode {
16 enum class Type {
17 Block,
18 If,
19 EndIf,
20 Loop,
21 Repeat,
22 Break,
23 Return,
24 Unreachable,
25 };
26 union Data {
27 Block* block;
28 struct {
29 U1 cond;
30 Block* body;
31 Block* merge;
32 } if_node;
33 struct {
34 Block* merge;
35 } end_if;
36 struct {
37 Block* body;
38 Block* continue_block;
39 Block* merge;
40 } loop;
41 struct {
42 U1 cond;
43 Block* loop_header;
44 Block* merge;
45 } repeat;
46 struct {
47 U1 cond;
48 Block* merge;
49 Block* skip;
50 } break_node;
51 };
52
53 Data data{};
54 Type type{};
55};
56using AbstractSyntaxList = std::vector<AbstractSyntaxNode>;
57
58} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp
new file mode 100644
index 000000000..4d0b8b8e5
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/attribute.cpp
@@ -0,0 +1,454 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <fmt/format.h>
6
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/ir/attribute.h"
9
10namespace Shader::IR {
11
12bool IsGeneric(Attribute attribute) noexcept {
13 return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X;
14}
15
16u32 GenericAttributeIndex(Attribute attribute) {
17 if (!IsGeneric(attribute)) {
18 throw InvalidArgument("Attribute is not generic {}", attribute);
19 }
20 return (static_cast<u32>(attribute) - static_cast<u32>(Attribute::Generic0X)) / 4u;
21}
22
23u32 GenericAttributeElement(Attribute attribute) {
24 if (!IsGeneric(attribute)) {
25 throw InvalidArgument("Attribute is not generic {}", attribute);
26 }
27 return static_cast<u32>(attribute) % 4;
28}
29
30std::string NameOf(Attribute attribute) {
31 switch (attribute) {
32 case Attribute::PrimitiveId:
33 return "PrimitiveId";
34 case Attribute::Layer:
35 return "Layer";
36 case Attribute::ViewportIndex:
37 return "ViewportIndex";
38 case Attribute::PointSize:
39 return "PointSize";
40 case Attribute::PositionX:
41 return "Position.X";
42 case Attribute::PositionY:
43 return "Position.Y";
44 case Attribute::PositionZ:
45 return "Position.Z";
46 case Attribute::PositionW:
47 return "Position.W";
48 case Attribute::Generic0X:
49 return "Generic[0].X";
50 case Attribute::Generic0Y:
51 return "Generic[0].Y";
52 case Attribute::Generic0Z:
53 return "Generic[0].Z";
54 case Attribute::Generic0W:
55 return "Generic[0].W";
56 case Attribute::Generic1X:
57 return "Generic[1].X";
58 case Attribute::Generic1Y:
59 return "Generic[1].Y";
60 case Attribute::Generic1Z:
61 return "Generic[1].Z";
62 case Attribute::Generic1W:
63 return "Generic[1].W";
64 case Attribute::Generic2X:
65 return "Generic[2].X";
66 case Attribute::Generic2Y:
67 return "Generic[2].Y";
68 case Attribute::Generic2Z:
69 return "Generic[2].Z";
70 case Attribute::Generic2W:
71 return "Generic[2].W";
72 case Attribute::Generic3X:
73 return "Generic[3].X";
74 case Attribute::Generic3Y:
75 return "Generic[3].Y";
76 case Attribute::Generic3Z:
77 return "Generic[3].Z";
78 case Attribute::Generic3W:
79 return "Generic[3].W";
80 case Attribute::Generic4X:
81 return "Generic[4].X";
82 case Attribute::Generic4Y:
83 return "Generic[4].Y";
84 case Attribute::Generic4Z:
85 return "Generic[4].Z";
86 case Attribute::Generic4W:
87 return "Generic[4].W";
88 case Attribute::Generic5X:
89 return "Generic[5].X";
90 case Attribute::Generic5Y:
91 return "Generic[5].Y";
92 case Attribute::Generic5Z:
93 return "Generic[5].Z";
94 case Attribute::Generic5W:
95 return "Generic[5].W";
96 case Attribute::Generic6X:
97 return "Generic[6].X";
98 case Attribute::Generic6Y:
99 return "Generic[6].Y";
100 case Attribute::Generic6Z:
101 return "Generic[6].Z";
102 case Attribute::Generic6W:
103 return "Generic[6].W";
104 case Attribute::Generic7X:
105 return "Generic[7].X";
106 case Attribute::Generic7Y:
107 return "Generic[7].Y";
108 case Attribute::Generic7Z:
109 return "Generic[7].Z";
110 case Attribute::Generic7W:
111 return "Generic[7].W";
112 case Attribute::Generic8X:
113 return "Generic[8].X";
114 case Attribute::Generic8Y:
115 return "Generic[8].Y";
116 case Attribute::Generic8Z:
117 return "Generic[8].Z";
118 case Attribute::Generic8W:
119 return "Generic[8].W";
120 case Attribute::Generic9X:
121 return "Generic[9].X";
122 case Attribute::Generic9Y:
123 return "Generic[9].Y";
124 case Attribute::Generic9Z:
125 return "Generic[9].Z";
126 case Attribute::Generic9W:
127 return "Generic[9].W";
128 case Attribute::Generic10X:
129 return "Generic[10].X";
130 case Attribute::Generic10Y:
131 return "Generic[10].Y";
132 case Attribute::Generic10Z:
133 return "Generic[10].Z";
134 case Attribute::Generic10W:
135 return "Generic[10].W";
136 case Attribute::Generic11X:
137 return "Generic[11].X";
138 case Attribute::Generic11Y:
139 return "Generic[11].Y";
140 case Attribute::Generic11Z:
141 return "Generic[11].Z";
142 case Attribute::Generic11W:
143 return "Generic[11].W";
144 case Attribute::Generic12X:
145 return "Generic[12].X";
146 case Attribute::Generic12Y:
147 return "Generic[12].Y";
148 case Attribute::Generic12Z:
149 return "Generic[12].Z";
150 case Attribute::Generic12W:
151 return "Generic[12].W";
152 case Attribute::Generic13X:
153 return "Generic[13].X";
154 case Attribute::Generic13Y:
155 return "Generic[13].Y";
156 case Attribute::Generic13Z:
157 return "Generic[13].Z";
158 case Attribute::Generic13W:
159 return "Generic[13].W";
160 case Attribute::Generic14X:
161 return "Generic[14].X";
162 case Attribute::Generic14Y:
163 return "Generic[14].Y";
164 case Attribute::Generic14Z:
165 return "Generic[14].Z";
166 case Attribute::Generic14W:
167 return "Generic[14].W";
168 case Attribute::Generic15X:
169 return "Generic[15].X";
170 case Attribute::Generic15Y:
171 return "Generic[15].Y";
172 case Attribute::Generic15Z:
173 return "Generic[15].Z";
174 case Attribute::Generic15W:
175 return "Generic[15].W";
176 case Attribute::Generic16X:
177 return "Generic[16].X";
178 case Attribute::Generic16Y:
179 return "Generic[16].Y";
180 case Attribute::Generic16Z:
181 return "Generic[16].Z";
182 case Attribute::Generic16W:
183 return "Generic[16].W";
184 case Attribute::Generic17X:
185 return "Generic[17].X";
186 case Attribute::Generic17Y:
187 return "Generic[17].Y";
188 case Attribute::Generic17Z:
189 return "Generic[17].Z";
190 case Attribute::Generic17W:
191 return "Generic[17].W";
192 case Attribute::Generic18X:
193 return "Generic[18].X";
194 case Attribute::Generic18Y:
195 return "Generic[18].Y";
196 case Attribute::Generic18Z:
197 return "Generic[18].Z";
198 case Attribute::Generic18W:
199 return "Generic[18].W";
200 case Attribute::Generic19X:
201 return "Generic[19].X";
202 case Attribute::Generic19Y:
203 return "Generic[19].Y";
204 case Attribute::Generic19Z:
205 return "Generic[19].Z";
206 case Attribute::Generic19W:
207 return "Generic[19].W";
208 case Attribute::Generic20X:
209 return "Generic[20].X";
210 case Attribute::Generic20Y:
211 return "Generic[20].Y";
212 case Attribute::Generic20Z:
213 return "Generic[20].Z";
214 case Attribute::Generic20W:
215 return "Generic[20].W";
216 case Attribute::Generic21X:
217 return "Generic[21].X";
218 case Attribute::Generic21Y:
219 return "Generic[21].Y";
220 case Attribute::Generic21Z:
221 return "Generic[21].Z";
222 case Attribute::Generic21W:
223 return "Generic[21].W";
224 case Attribute::Generic22X:
225 return "Generic[22].X";
226 case Attribute::Generic22Y:
227 return "Generic[22].Y";
228 case Attribute::Generic22Z:
229 return "Generic[22].Z";
230 case Attribute::Generic22W:
231 return "Generic[22].W";
232 case Attribute::Generic23X:
233 return "Generic[23].X";
234 case Attribute::Generic23Y:
235 return "Generic[23].Y";
236 case Attribute::Generic23Z:
237 return "Generic[23].Z";
238 case Attribute::Generic23W:
239 return "Generic[23].W";
240 case Attribute::Generic24X:
241 return "Generic[24].X";
242 case Attribute::Generic24Y:
243 return "Generic[24].Y";
244 case Attribute::Generic24Z:
245 return "Generic[24].Z";
246 case Attribute::Generic24W:
247 return "Generic[24].W";
248 case Attribute::Generic25X:
249 return "Generic[25].X";
250 case Attribute::Generic25Y:
251 return "Generic[25].Y";
252 case Attribute::Generic25Z:
253 return "Generic[25].Z";
254 case Attribute::Generic25W:
255 return "Generic[25].W";
256 case Attribute::Generic26X:
257 return "Generic[26].X";
258 case Attribute::Generic26Y:
259 return "Generic[26].Y";
260 case Attribute::Generic26Z:
261 return "Generic[26].Z";
262 case Attribute::Generic26W:
263 return "Generic[26].W";
264 case Attribute::Generic27X:
265 return "Generic[27].X";
266 case Attribute::Generic27Y:
267 return "Generic[27].Y";
268 case Attribute::Generic27Z:
269 return "Generic[27].Z";
270 case Attribute::Generic27W:
271 return "Generic[27].W";
272 case Attribute::Generic28X:
273 return "Generic[28].X";
274 case Attribute::Generic28Y:
275 return "Generic[28].Y";
276 case Attribute::Generic28Z:
277 return "Generic[28].Z";
278 case Attribute::Generic28W:
279 return "Generic[28].W";
280 case Attribute::Generic29X:
281 return "Generic[29].X";
282 case Attribute::Generic29Y:
283 return "Generic[29].Y";
284 case Attribute::Generic29Z:
285 return "Generic[29].Z";
286 case Attribute::Generic29W:
287 return "Generic[29].W";
288 case Attribute::Generic30X:
289 return "Generic[30].X";
290 case Attribute::Generic30Y:
291 return "Generic[30].Y";
292 case Attribute::Generic30Z:
293 return "Generic[30].Z";
294 case Attribute::Generic30W:
295 return "Generic[30].W";
296 case Attribute::Generic31X:
297 return "Generic[31].X";
298 case Attribute::Generic31Y:
299 return "Generic[31].Y";
300 case Attribute::Generic31Z:
301 return "Generic[31].Z";
302 case Attribute::Generic31W:
303 return "Generic[31].W";
304 case Attribute::ColorFrontDiffuseR:
305 return "ColorFrontDiffuse.R";
306 case Attribute::ColorFrontDiffuseG:
307 return "ColorFrontDiffuse.G";
308 case Attribute::ColorFrontDiffuseB:
309 return "ColorFrontDiffuse.B";
310 case Attribute::ColorFrontDiffuseA:
311 return "ColorFrontDiffuse.A";
312 case Attribute::ColorFrontSpecularR:
313 return "ColorFrontSpecular.R";
314 case Attribute::ColorFrontSpecularG:
315 return "ColorFrontSpecular.G";
316 case Attribute::ColorFrontSpecularB:
317 return "ColorFrontSpecular.B";
318 case Attribute::ColorFrontSpecularA:
319 return "ColorFrontSpecular.A";
320 case Attribute::ColorBackDiffuseR:
321 return "ColorBackDiffuse.R";
322 case Attribute::ColorBackDiffuseG:
323 return "ColorBackDiffuse.G";
324 case Attribute::ColorBackDiffuseB:
325 return "ColorBackDiffuse.B";
326 case Attribute::ColorBackDiffuseA:
327 return "ColorBackDiffuse.A";
328 case Attribute::ColorBackSpecularR:
329 return "ColorBackSpecular.R";
330 case Attribute::ColorBackSpecularG:
331 return "ColorBackSpecular.G";
332 case Attribute::ColorBackSpecularB:
333 return "ColorBackSpecular.B";
334 case Attribute::ColorBackSpecularA:
335 return "ColorBackSpecular.A";
336 case Attribute::ClipDistance0:
337 return "ClipDistance[0]";
338 case Attribute::ClipDistance1:
339 return "ClipDistance[1]";
340 case Attribute::ClipDistance2:
341 return "ClipDistance[2]";
342 case Attribute::ClipDistance3:
343 return "ClipDistance[3]";
344 case Attribute::ClipDistance4:
345 return "ClipDistance[4]";
346 case Attribute::ClipDistance5:
347 return "ClipDistance[5]";
348 case Attribute::ClipDistance6:
349 return "ClipDistance[6]";
350 case Attribute::ClipDistance7:
351 return "ClipDistance[7]";
352 case Attribute::PointSpriteS:
353 return "PointSprite.S";
354 case Attribute::PointSpriteT:
355 return "PointSprite.T";
356 case Attribute::FogCoordinate:
357 return "FogCoordinate";
358 case Attribute::TessellationEvaluationPointU:
359 return "TessellationEvaluationPoint.U";
360 case Attribute::TessellationEvaluationPointV:
361 return "TessellationEvaluationPoint.V";
362 case Attribute::InstanceId:
363 return "InstanceId";
364 case Attribute::VertexId:
365 return "VertexId";
366 case Attribute::FixedFncTexture0S:
367 return "FixedFncTexture[0].S";
368 case Attribute::FixedFncTexture0T:
369 return "FixedFncTexture[0].T";
370 case Attribute::FixedFncTexture0R:
371 return "FixedFncTexture[0].R";
372 case Attribute::FixedFncTexture0Q:
373 return "FixedFncTexture[0].Q";
374 case Attribute::FixedFncTexture1S:
375 return "FixedFncTexture[1].S";
376 case Attribute::FixedFncTexture1T:
377 return "FixedFncTexture[1].T";
378 case Attribute::FixedFncTexture1R:
379 return "FixedFncTexture[1].R";
380 case Attribute::FixedFncTexture1Q:
381 return "FixedFncTexture[1].Q";
382 case Attribute::FixedFncTexture2S:
383 return "FixedFncTexture[2].S";
384 case Attribute::FixedFncTexture2T:
385 return "FixedFncTexture[2].T";
386 case Attribute::FixedFncTexture2R:
387 return "FixedFncTexture[2].R";
388 case Attribute::FixedFncTexture2Q:
389 return "FixedFncTexture[2].Q";
390 case Attribute::FixedFncTexture3S:
391 return "FixedFncTexture[3].S";
392 case Attribute::FixedFncTexture3T:
393 return "FixedFncTexture[3].T";
394 case Attribute::FixedFncTexture3R:
395 return "FixedFncTexture[3].R";
396 case Attribute::FixedFncTexture3Q:
397 return "FixedFncTexture[3].Q";
398 case Attribute::FixedFncTexture4S:
399 return "FixedFncTexture[4].S";
400 case Attribute::FixedFncTexture4T:
401 return "FixedFncTexture[4].T";
402 case Attribute::FixedFncTexture4R:
403 return "FixedFncTexture[4].R";
404 case Attribute::FixedFncTexture4Q:
405 return "FixedFncTexture[4].Q";
406 case Attribute::FixedFncTexture5S:
407 return "FixedFncTexture[5].S";
408 case Attribute::FixedFncTexture5T:
409 return "FixedFncTexture[5].T";
410 case Attribute::FixedFncTexture5R:
411 return "FixedFncTexture[5].R";
412 case Attribute::FixedFncTexture5Q:
413 return "FixedFncTexture[5].Q";
414 case Attribute::FixedFncTexture6S:
415 return "FixedFncTexture[6].S";
416 case Attribute::FixedFncTexture6T:
417 return "FixedFncTexture[6].T";
418 case Attribute::FixedFncTexture6R:
419 return "FixedFncTexture[6].R";
420 case Attribute::FixedFncTexture6Q:
421 return "FixedFncTexture[6].Q";
422 case Attribute::FixedFncTexture7S:
423 return "FixedFncTexture[7].S";
424 case Attribute::FixedFncTexture7T:
425 return "FixedFncTexture[7].T";
426 case Attribute::FixedFncTexture7R:
427 return "FixedFncTexture[7].R";
428 case Attribute::FixedFncTexture7Q:
429 return "FixedFncTexture[7].Q";
430 case Attribute::FixedFncTexture8S:
431 return "FixedFncTexture[8].S";
432 case Attribute::FixedFncTexture8T:
433 return "FixedFncTexture[8].T";
434 case Attribute::FixedFncTexture8R:
435 return "FixedFncTexture[8].R";
436 case Attribute::FixedFncTexture8Q:
437 return "FixedFncTexture[8].Q";
438 case Attribute::FixedFncTexture9S:
439 return "FixedFncTexture[9].S";
440 case Attribute::FixedFncTexture9T:
441 return "FixedFncTexture[9].T";
442 case Attribute::FixedFncTexture9R:
443 return "FixedFncTexture[9].R";
444 case Attribute::FixedFncTexture9Q:
445 return "FixedFncTexture[9].Q";
446 case Attribute::ViewportMask:
447 return "ViewportMask";
448 case Attribute::FrontFace:
449 return "FrontFace";
450 }
451 return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
452}
453
454} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h
new file mode 100644
index 000000000..ca1199494
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/attribute.h
@@ -0,0 +1,250 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <fmt/format.h>
8
9#include "common/common_types.h"
10
11namespace Shader::IR {
12
13enum class Attribute : u64 {
14 PrimitiveId = 24,
15 Layer = 25,
16 ViewportIndex = 26,
17 PointSize = 27,
18 PositionX = 28,
19 PositionY = 29,
20 PositionZ = 30,
21 PositionW = 31,
22 Generic0X = 32,
23 Generic0Y = 33,
24 Generic0Z = 34,
25 Generic0W = 35,
26 Generic1X = 36,
27 Generic1Y = 37,
28 Generic1Z = 38,
29 Generic1W = 39,
30 Generic2X = 40,
31 Generic2Y = 41,
32 Generic2Z = 42,
33 Generic2W = 43,
34 Generic3X = 44,
35 Generic3Y = 45,
36 Generic3Z = 46,
37 Generic3W = 47,
38 Generic4X = 48,
39 Generic4Y = 49,
40 Generic4Z = 50,
41 Generic4W = 51,
42 Generic5X = 52,
43 Generic5Y = 53,
44 Generic5Z = 54,
45 Generic5W = 55,
46 Generic6X = 56,
47 Generic6Y = 57,
48 Generic6Z = 58,
49 Generic6W = 59,
50 Generic7X = 60,
51 Generic7Y = 61,
52 Generic7Z = 62,
53 Generic7W = 63,
54 Generic8X = 64,
55 Generic8Y = 65,
56 Generic8Z = 66,
57 Generic8W = 67,
58 Generic9X = 68,
59 Generic9Y = 69,
60 Generic9Z = 70,
61 Generic9W = 71,
62 Generic10X = 72,
63 Generic10Y = 73,
64 Generic10Z = 74,
65 Generic10W = 75,
66 Generic11X = 76,
67 Generic11Y = 77,
68 Generic11Z = 78,
69 Generic11W = 79,
70 Generic12X = 80,
71 Generic12Y = 81,
72 Generic12Z = 82,
73 Generic12W = 83,
74 Generic13X = 84,
75 Generic13Y = 85,
76 Generic13Z = 86,
77 Generic13W = 87,
78 Generic14X = 88,
79 Generic14Y = 89,
80 Generic14Z = 90,
81 Generic14W = 91,
82 Generic15X = 92,
83 Generic15Y = 93,
84 Generic15Z = 94,
85 Generic15W = 95,
86 Generic16X = 96,
87 Generic16Y = 97,
88 Generic16Z = 98,
89 Generic16W = 99,
90 Generic17X = 100,
91 Generic17Y = 101,
92 Generic17Z = 102,
93 Generic17W = 103,
94 Generic18X = 104,
95 Generic18Y = 105,
96 Generic18Z = 106,
97 Generic18W = 107,
98 Generic19X = 108,
99 Generic19Y = 109,
100 Generic19Z = 110,
101 Generic19W = 111,
102 Generic20X = 112,
103 Generic20Y = 113,
104 Generic20Z = 114,
105 Generic20W = 115,
106 Generic21X = 116,
107 Generic21Y = 117,
108 Generic21Z = 118,
109 Generic21W = 119,
110 Generic22X = 120,
111 Generic22Y = 121,
112 Generic22Z = 122,
113 Generic22W = 123,
114 Generic23X = 124,
115 Generic23Y = 125,
116 Generic23Z = 126,
117 Generic23W = 127,
118 Generic24X = 128,
119 Generic24Y = 129,
120 Generic24Z = 130,
121 Generic24W = 131,
122 Generic25X = 132,
123 Generic25Y = 133,
124 Generic25Z = 134,
125 Generic25W = 135,
126 Generic26X = 136,
127 Generic26Y = 137,
128 Generic26Z = 138,
129 Generic26W = 139,
130 Generic27X = 140,
131 Generic27Y = 141,
132 Generic27Z = 142,
133 Generic27W = 143,
134 Generic28X = 144,
135 Generic28Y = 145,
136 Generic28Z = 146,
137 Generic28W = 147,
138 Generic29X = 148,
139 Generic29Y = 149,
140 Generic29Z = 150,
141 Generic29W = 151,
142 Generic30X = 152,
143 Generic30Y = 153,
144 Generic30Z = 154,
145 Generic30W = 155,
146 Generic31X = 156,
147 Generic31Y = 157,
148 Generic31Z = 158,
149 Generic31W = 159,
150 ColorFrontDiffuseR = 160,
151 ColorFrontDiffuseG = 161,
152 ColorFrontDiffuseB = 162,
153 ColorFrontDiffuseA = 163,
154 ColorFrontSpecularR = 164,
155 ColorFrontSpecularG = 165,
156 ColorFrontSpecularB = 166,
157 ColorFrontSpecularA = 167,
158 ColorBackDiffuseR = 168,
159 ColorBackDiffuseG = 169,
160 ColorBackDiffuseB = 170,
161 ColorBackDiffuseA = 171,
162 ColorBackSpecularR = 172,
163 ColorBackSpecularG = 173,
164 ColorBackSpecularB = 174,
165 ColorBackSpecularA = 175,
166 ClipDistance0 = 176,
167 ClipDistance1 = 177,
168 ClipDistance2 = 178,
169 ClipDistance3 = 179,
170 ClipDistance4 = 180,
171 ClipDistance5 = 181,
172 ClipDistance6 = 182,
173 ClipDistance7 = 183,
174 PointSpriteS = 184,
175 PointSpriteT = 185,
176 FogCoordinate = 186,
177 TessellationEvaluationPointU = 188,
178 TessellationEvaluationPointV = 189,
179 InstanceId = 190,
180 VertexId = 191,
181 FixedFncTexture0S = 192,
182 FixedFncTexture0T = 193,
183 FixedFncTexture0R = 194,
184 FixedFncTexture0Q = 195,
185 FixedFncTexture1S = 196,
186 FixedFncTexture1T = 197,
187 FixedFncTexture1R = 198,
188 FixedFncTexture1Q = 199,
189 FixedFncTexture2S = 200,
190 FixedFncTexture2T = 201,
191 FixedFncTexture2R = 202,
192 FixedFncTexture2Q = 203,
193 FixedFncTexture3S = 204,
194 FixedFncTexture3T = 205,
195 FixedFncTexture3R = 206,
196 FixedFncTexture3Q = 207,
197 FixedFncTexture4S = 208,
198 FixedFncTexture4T = 209,
199 FixedFncTexture4R = 210,
200 FixedFncTexture4Q = 211,
201 FixedFncTexture5S = 212,
202 FixedFncTexture5T = 213,
203 FixedFncTexture5R = 214,
204 FixedFncTexture5Q = 215,
205 FixedFncTexture6S = 216,
206 FixedFncTexture6T = 217,
207 FixedFncTexture6R = 218,
208 FixedFncTexture6Q = 219,
209 FixedFncTexture7S = 220,
210 FixedFncTexture7T = 221,
211 FixedFncTexture7R = 222,
212 FixedFncTexture7Q = 223,
213 FixedFncTexture8S = 224,
214 FixedFncTexture8T = 225,
215 FixedFncTexture8R = 226,
216 FixedFncTexture8Q = 227,
217 FixedFncTexture9S = 228,
218 FixedFncTexture9T = 229,
219 FixedFncTexture9R = 230,
220 FixedFncTexture9Q = 231,
221 ViewportMask = 232,
222 FrontFace = 255,
223};
224
225constexpr size_t NUM_GENERICS = 32;
226
227[[nodiscard]] bool IsGeneric(Attribute attribute) noexcept;
228
229[[nodiscard]] u32 GenericAttributeIndex(Attribute attribute);
230
231[[nodiscard]] u32 GenericAttributeElement(Attribute attribute);
232
233[[nodiscard]] std::string NameOf(Attribute attribute);
234
235[[nodiscard]] constexpr IR::Attribute operator+(IR::Attribute attribute, size_t value) noexcept {
236 return static_cast<IR::Attribute>(static_cast<size_t>(attribute) + value);
237}
238
239} // namespace Shader::IR
240
241template <>
242struct fmt::formatter<Shader::IR::Attribute> {
243 constexpr auto parse(format_parse_context& ctx) {
244 return ctx.begin();
245 }
246 template <typename FormatContext>
247 auto format(const Shader::IR::Attribute& attribute, FormatContext& ctx) {
248 return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(attribute));
249 }
250};
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
new file mode 100644
index 000000000..7c08b25ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -0,0 +1,149 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <initializer_list>
7#include <map>
8#include <memory>
9
10#include "common/bit_cast.h"
11#include "common/common_types.h"
12#include "shader_recompiler/frontend/ir/basic_block.h"
13#include "shader_recompiler/frontend/ir/value.h"
14
15namespace Shader::IR {
16
17Block::Block(ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {}
18
19Block::~Block() = default;
20
21void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) {
22 PrependNewInst(end(), op, args);
23}
24
25Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
26 std::initializer_list<Value> args, u32 flags) {
27 Inst* const inst{inst_pool->Create(op, flags)};
28 const auto result_it{instructions.insert(insertion_point, *inst)};
29
30 if (inst->NumArgs() != args.size()) {
31 throw InvalidArgument("Invalid number of arguments {} in {}", args.size(), op);
32 }
33 std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable {
34 inst->SetArg(index, arg);
35 ++index;
36 });
37 return result_it;
38}
39
40void Block::AddBranch(Block* block) {
41 if (std::ranges::find(imm_successors, block) != imm_successors.end()) {
42 throw LogicError("Successor already inserted");
43 }
44 if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) {
45 throw LogicError("Predecessor already inserted");
46 }
47 imm_successors.push_back(block);
48 block->imm_predecessors.push_back(this);
49}
50
51static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_index,
52 Block* block) {
53 if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) {
54 return fmt::format("{{Block ${}}}", it->second);
55 }
56 return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(block));
57}
58
59static size_t InstIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
60 const Inst* inst) {
61 const auto [it, is_inserted]{inst_to_index.emplace(inst, inst_index + 1)};
62 if (is_inserted) {
63 ++inst_index;
64 }
65 return it->second;
66}
67
68static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
69 const Value& arg) {
70 if (arg.IsEmpty()) {
71 return "<null>";
72 }
73 if (!arg.IsImmediate() || arg.IsIdentity()) {
74 return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst()));
75 }
76 switch (arg.Type()) {
77 case Type::U1:
78 return fmt::format("#{}", arg.U1() ? "true" : "false");
79 case Type::U8:
80 return fmt::format("#{}", arg.U8());
81 case Type::U16:
82 return fmt::format("#{}", arg.U16());
83 case Type::U32:
84 return fmt::format("#{}", arg.U32());
85 case Type::U64:
86 return fmt::format("#{}", arg.U64());
87 case Type::F32:
88 return fmt::format("#{}", arg.F32());
89 case Type::Reg:
90 return fmt::format("{}", arg.Reg());
91 case Type::Pred:
92 return fmt::format("{}", arg.Pred());
93 case Type::Attribute:
94 return fmt::format("{}", arg.Attribute());
95 default:
96 return "<unknown immediate type>";
97 }
98}
99
100std::string DumpBlock(const Block& block) {
101 size_t inst_index{0};
102 std::map<const Inst*, size_t> inst_to_index;
103 return DumpBlock(block, {}, inst_to_index, inst_index);
104}
105
106std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& block_to_index,
107 std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index) {
108 std::string ret{"Block"};
109 if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) {
110 ret += fmt::format(" ${}", it->second);
111 }
112 ret += '\n';
113 for (const Inst& inst : block) {
114 const Opcode op{inst.GetOpcode()};
115 ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst));
116 if (TypeOf(op) != Type::Void) {
117 ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op);
118 } else {
119 ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces
120 }
121 const size_t arg_count{inst.NumArgs()};
122 for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) {
123 const Value arg{inst.Arg(arg_index)};
124 const std::string arg_str{ArgToIndex(inst_to_index, inst_index, arg)};
125 ret += arg_index != 0 ? ", " : " ";
126 if (op == Opcode::Phi) {
127 ret += fmt::format("[ {}, {} ]", arg_str,
128 BlockToIndex(block_to_index, inst.PhiBlock(arg_index)));
129 } else {
130 ret += arg_str;
131 }
132 if (op != Opcode::Phi) {
133 const Type actual_type{arg.Type()};
134 const Type expected_type{ArgTypeOf(op, arg_index)};
135 if (!AreTypesCompatible(actual_type, expected_type)) {
136 ret += fmt::format("<type error: {} != {}>", actual_type, expected_type);
137 }
138 }
139 }
140 if (TypeOf(op) != Type::Void) {
141 ret += fmt::format(" (uses: {})\n", inst.UseCount());
142 } else {
143 ret += '\n';
144 }
145 }
146 return ret;
147}
148
149} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h
new file mode 100644
index 000000000..7e134b4c7
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/basic_block.h
@@ -0,0 +1,185 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <initializer_list>
8#include <map>
9#include <span>
10#include <vector>
11
12#include <boost/intrusive/list.hpp>
13
14#include "common/bit_cast.h"
15#include "common/common_types.h"
16#include "shader_recompiler/frontend/ir/condition.h"
17#include "shader_recompiler/frontend/ir/value.h"
18#include "shader_recompiler/object_pool.h"
19
20namespace Shader::IR {
21
22class Block {
23public:
24 using InstructionList = boost::intrusive::list<Inst>;
25 using size_type = InstructionList::size_type;
26 using iterator = InstructionList::iterator;
27 using const_iterator = InstructionList::const_iterator;
28 using reverse_iterator = InstructionList::reverse_iterator;
29 using const_reverse_iterator = InstructionList::const_reverse_iterator;
30
31 explicit Block(ObjectPool<Inst>& inst_pool_);
32 ~Block();
33
34 Block(const Block&) = delete;
35 Block& operator=(const Block&) = delete;
36
37 Block(Block&&) = default;
38 Block& operator=(Block&&) = default;
39
40 /// Appends a new instruction to the end of this basic block.
41 void AppendNewInst(Opcode op, std::initializer_list<Value> args);
42
43 /// Prepends a new instruction to this basic block before the insertion point.
44 iterator PrependNewInst(iterator insertion_point, Opcode op,
45 std::initializer_list<Value> args = {}, u32 flags = 0);
46
47 /// Adds a new branch to this basic block.
48 void AddBranch(Block* block);
49
50 /// Gets a mutable reference to the instruction list for this basic block.
51 [[nodiscard]] InstructionList& Instructions() noexcept {
52 return instructions;
53 }
54 /// Gets an immutable reference to the instruction list for this basic block.
55 [[nodiscard]] const InstructionList& Instructions() const noexcept {
56 return instructions;
57 }
58
59 /// Gets an immutable span to the immediate predecessors.
60 [[nodiscard]] std::span<Block* const> ImmPredecessors() const noexcept {
61 return imm_predecessors;
62 }
63 /// Gets an immutable span to the immediate successors.
64 [[nodiscard]] std::span<Block* const> ImmSuccessors() const noexcept {
65 return imm_successors;
66 }
67
68 /// Intrusively store the host definition of this instruction.
69 template <typename DefinitionType>
70 void SetDefinition(DefinitionType def) {
71 definition = Common::BitCast<u32>(def);
72 }
73
74 /// Return the intrusively stored host definition of this instruction.
75 template <typename DefinitionType>
76 [[nodiscard]] DefinitionType Definition() const noexcept {
77 return Common::BitCast<DefinitionType>(definition);
78 }
79
80 void SetSsaRegValue(IR::Reg reg, const Value& value) noexcept {
81 ssa_reg_values[RegIndex(reg)] = value;
82 }
83 const Value& SsaRegValue(IR::Reg reg) const noexcept {
84 return ssa_reg_values[RegIndex(reg)];
85 }
86
87 void SsaSeal() noexcept {
88 is_ssa_sealed = true;
89 }
90 [[nodiscard]] bool IsSsaSealed() const noexcept {
91 return is_ssa_sealed;
92 }
93
94 [[nodiscard]] bool empty() const {
95 return instructions.empty();
96 }
97 [[nodiscard]] size_type size() const {
98 return instructions.size();
99 }
100
101 [[nodiscard]] Inst& front() {
102 return instructions.front();
103 }
104 [[nodiscard]] const Inst& front() const {
105 return instructions.front();
106 }
107
108 [[nodiscard]] Inst& back() {
109 return instructions.back();
110 }
111 [[nodiscard]] const Inst& back() const {
112 return instructions.back();
113 }
114
115 [[nodiscard]] iterator begin() {
116 return instructions.begin();
117 }
118 [[nodiscard]] const_iterator begin() const {
119 return instructions.begin();
120 }
121 [[nodiscard]] iterator end() {
122 return instructions.end();
123 }
124 [[nodiscard]] const_iterator end() const {
125 return instructions.end();
126 }
127
128 [[nodiscard]] reverse_iterator rbegin() {
129 return instructions.rbegin();
130 }
131 [[nodiscard]] const_reverse_iterator rbegin() const {
132 return instructions.rbegin();
133 }
134 [[nodiscard]] reverse_iterator rend() {
135 return instructions.rend();
136 }
137 [[nodiscard]] const_reverse_iterator rend() const {
138 return instructions.rend();
139 }
140
141 [[nodiscard]] const_iterator cbegin() const {
142 return instructions.cbegin();
143 }
144 [[nodiscard]] const_iterator cend() const {
145 return instructions.cend();
146 }
147
148 [[nodiscard]] const_reverse_iterator crbegin() const {
149 return instructions.crbegin();
150 }
151 [[nodiscard]] const_reverse_iterator crend() const {
152 return instructions.crend();
153 }
154
155private:
156 /// Memory pool for instruction list
157 ObjectPool<Inst>* inst_pool;
158
159 /// List of instructions in this block
160 InstructionList instructions;
161
162 /// Block immediate predecessors
163 std::vector<Block*> imm_predecessors;
164 /// Block immediate successors
165 std::vector<Block*> imm_successors;
166
167 /// Intrusively store the value of a register in the block.
168 std::array<Value, NUM_REGS> ssa_reg_values;
169 /// Intrusively store if the block is sealed in the SSA pass.
170 bool is_ssa_sealed{false};
171
172 /// Intrusively stored host definition of this block.
173 u32 definition{};
174};
175
176using BlockList = std::vector<Block*>;
177
178[[nodiscard]] std::string DumpBlock(const Block& block);
179
180[[nodiscard]] std::string DumpBlock(const Block& block,
181 const std::map<const Block*, size_t>& block_to_index,
182 std::map<const Inst*, size_t>& inst_to_index,
183 size_t& inst_index);
184
185} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/breadth_first_search.h b/src/shader_recompiler/frontend/ir/breadth_first_search.h
new file mode 100644
index 000000000..a52ccbd58
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/breadth_first_search.h
@@ -0,0 +1,56 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <type_traits>
9#include <queue>
10
11#include <boost/container/small_vector.hpp>
12
13#include "shader_recompiler/frontend/ir/value.h"
14
15namespace Shader::IR {
16
17template <typename Pred>
18auto BreadthFirstSearch(const Value& value, Pred&& pred)
19 -> std::invoke_result_t<Pred, const Inst*> {
20 if (value.IsImmediate()) {
21 // Nothing to do with immediates
22 return std::nullopt;
23 }
24 // Breadth-first search visiting the right most arguments first
25 // Small vector has been determined from shaders in Super Smash Bros. Ultimate
26 boost::container::small_vector<const Inst*, 2> visited;
27 std::queue<const Inst*> queue;
28 queue.push(value.InstRecursive());
29
30 while (!queue.empty()) {
31 // Pop one instruction from the queue
32 const Inst* const inst{queue.front()};
33 queue.pop();
34 if (const std::optional result = pred(inst)) {
35 // This is the instruction we were looking for
36 return result;
37 }
38 // Visit the right most arguments first
39 for (size_t arg = inst->NumArgs(); arg--;) {
40 const Value arg_value{inst->Arg(arg)};
41 if (arg_value.IsImmediate()) {
42 continue;
43 }
44 // Queue instruction if it hasn't been visited
45 const Inst* const arg_inst{arg_value.InstRecursive()};
46 if (std::ranges::find(visited, arg_inst) == visited.end()) {
47 visited.push_back(arg_inst);
48 queue.push(arg_inst);
49 }
50 }
51 }
52 // SSA tree has been traversed and the result hasn't been found
53 return std::nullopt;
54}
55
56} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp
new file mode 100644
index 000000000..fc18ea2a2
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/condition.cpp
@@ -0,0 +1,29 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6
7#include <fmt/format.h>
8
9#include "shader_recompiler/frontend/ir/condition.h"
10
11namespace Shader::IR {
12
13std::string NameOf(Condition condition) {
14 std::string ret;
15 if (condition.GetFlowTest() != FlowTest::T) {
16 ret = fmt::to_string(condition.GetFlowTest());
17 }
18 const auto [pred, negated]{condition.GetPred()};
19 if (!ret.empty()) {
20 ret += '&';
21 }
22 if (negated) {
23 ret += '!';
24 }
25 ret += fmt::to_string(pred);
26 return ret;
27}
28
29} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h
new file mode 100644
index 000000000..aa8597c60
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/condition.h
@@ -0,0 +1,60 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <compare>
8#include <string>
9
10#include <fmt/format.h>
11
12#include "common/common_types.h"
13#include "shader_recompiler/frontend/ir/flow_test.h"
14#include "shader_recompiler/frontend/ir/pred.h"
15
16namespace Shader::IR {
17
18class Condition {
19public:
20 Condition() noexcept = default;
21
22 explicit Condition(FlowTest flow_test_, Pred pred_, bool pred_negated_ = false) noexcept
23 : flow_test{static_cast<u16>(flow_test_)}, pred{static_cast<u8>(pred_)},
24 pred_negated{pred_negated_ ? u8{1} : u8{0}} {}
25
26 explicit Condition(Pred pred_, bool pred_negated_ = false) noexcept
27 : Condition(FlowTest::T, pred_, pred_negated_) {}
28
29 explicit Condition(bool value) : Condition(Pred::PT, !value) {}
30
31 auto operator<=>(const Condition&) const noexcept = default;
32
33 [[nodiscard]] IR::FlowTest GetFlowTest() const noexcept {
34 return static_cast<IR::FlowTest>(flow_test);
35 }
36
37 [[nodiscard]] std::pair<IR::Pred, bool> GetPred() const noexcept {
38 return {static_cast<IR::Pred>(pred), pred_negated != 0};
39 }
40
41private:
42 u16 flow_test;
43 u8 pred;
44 u8 pred_negated;
45};
46
47std::string NameOf(Condition condition);
48
49} // namespace Shader::IR
50
51template <>
52struct fmt::formatter<Shader::IR::Condition> {
53 constexpr auto parse(format_parse_context& ctx) {
54 return ctx.begin();
55 }
56 template <typename FormatContext>
57 auto format(const Shader::IR::Condition& cond, FormatContext& ctx) {
58 return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(cond));
59 }
60};
diff --git a/src/shader_recompiler/frontend/ir/flow_test.cpp b/src/shader_recompiler/frontend/ir/flow_test.cpp
new file mode 100644
index 000000000..6ebb4ad89
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/flow_test.cpp
@@ -0,0 +1,83 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6
7#include <fmt/format.h>
8
9#include "shader_recompiler/frontend/ir/flow_test.h"
10
11namespace Shader::IR {
12
13std::string NameOf(FlowTest flow_test) {
14 switch (flow_test) {
15 case FlowTest::F:
16 return "F";
17 case FlowTest::LT:
18 return "LT";
19 case FlowTest::EQ:
20 return "EQ";
21 case FlowTest::LE:
22 return "LE";
23 case FlowTest::GT:
24 return "GT";
25 case FlowTest::NE:
26 return "NE";
27 case FlowTest::GE:
28 return "GE";
29 case FlowTest::NUM:
30 return "NUM";
31 case FlowTest::NaN:
32 return "NAN";
33 case FlowTest::LTU:
34 return "LTU";
35 case FlowTest::EQU:
36 return "EQU";
37 case FlowTest::LEU:
38 return "LEU";
39 case FlowTest::GTU:
40 return "GTU";
41 case FlowTest::NEU:
42 return "NEU";
43 case FlowTest::GEU:
44 return "GEU";
45 case FlowTest::T:
46 return "T";
47 case FlowTest::OFF:
48 return "OFF";
49 case FlowTest::LO:
50 return "LO";
51 case FlowTest::SFF:
52 return "SFF";
53 case FlowTest::LS:
54 return "LS";
55 case FlowTest::HI:
56 return "HI";
57 case FlowTest::SFT:
58 return "SFT";
59 case FlowTest::HS:
60 return "HS";
61 case FlowTest::OFT:
62 return "OFT";
63 case FlowTest::CSM_TA:
64 return "CSM_TA";
65 case FlowTest::CSM_TR:
66 return "CSM_TR";
67 case FlowTest::CSM_MX:
68 return "CSM_MX";
69 case FlowTest::FCSM_TA:
70 return "FCSM_TA";
71 case FlowTest::FCSM_TR:
72 return "FCSM_TR";
73 case FlowTest::FCSM_MX:
74 return "FCSM_MX";
75 case FlowTest::RLE:
76 return "RLE";
77 case FlowTest::RGT:
78 return "RGT";
79 }
80 return fmt::format("<invalid flow test {}>", static_cast<int>(flow_test));
81}
82
83} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/flow_test.h b/src/shader_recompiler/frontend/ir/flow_test.h
new file mode 100644
index 000000000..09e113773
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/flow_test.h
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <fmt/format.h>
9
10#include "common/common_types.h"
11
12namespace Shader::IR {
13
14enum class FlowTest : u64 {
15 F,
16 LT,
17 EQ,
18 LE,
19 GT,
20 NE,
21 GE,
22 NUM,
23 NaN,
24 LTU,
25 EQU,
26 LEU,
27 GTU,
28 NEU,
29 GEU,
30 T,
31 OFF,
32 LO,
33 SFF,
34 LS,
35 HI,
36 SFT,
37 HS,
38 OFT,
39 CSM_TA,
40 CSM_TR,
41 CSM_MX,
42 FCSM_TA,
43 FCSM_TR,
44 FCSM_MX,
45 RLE,
46 RGT,
47};
48
49[[nodiscard]] std::string NameOf(FlowTest flow_test);
50
51} // namespace Shader::IR
52
53template <>
54struct fmt::formatter<Shader::IR::FlowTest> {
55 constexpr auto parse(format_parse_context& ctx) {
56 return ctx.begin();
57 }
58 template <typename FormatContext>
59 auto format(const Shader::IR::FlowTest& flow_test, FormatContext& ctx) {
60 return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(flow_test));
61 }
62};
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
new file mode 100644
index 000000000..13159a68d
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -0,0 +1,2017 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_cast.h"
6#include "shader_recompiler/frontend/ir/ir_emitter.h"
7#include "shader_recompiler/frontend/ir/value.h"
8
9namespace Shader::IR {
10namespace {
11[[noreturn]] void ThrowInvalidType(Type type) {
12 throw InvalidArgument("Invalid type {}", type);
13}
14
15Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) {
16 if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) {
17 return ir.CompositeConstruct(bias_lod, lod_clamp);
18 } else if (!bias_lod.IsEmpty()) {
19 return bias_lod;
20 } else if (!lod_clamp.IsEmpty()) {
21 return lod_clamp;
22 } else {
23 return Value{};
24 }
25}
26} // Anonymous namespace
27
28U1 IREmitter::Imm1(bool value) const {
29 return U1{Value{value}};
30}
31
32U8 IREmitter::Imm8(u8 value) const {
33 return U8{Value{value}};
34}
35
36U16 IREmitter::Imm16(u16 value) const {
37 return U16{Value{value}};
38}
39
40U32 IREmitter::Imm32(u32 value) const {
41 return U32{Value{value}};
42}
43
44U32 IREmitter::Imm32(s32 value) const {
45 return U32{Value{static_cast<u32>(value)}};
46}
47
48F32 IREmitter::Imm32(f32 value) const {
49 return F32{Value{value}};
50}
51
52U64 IREmitter::Imm64(u64 value) const {
53 return U64{Value{value}};
54}
55
56U64 IREmitter::Imm64(s64 value) const {
57 return U64{Value{static_cast<u64>(value)}};
58}
59
60F64 IREmitter::Imm64(f64 value) const {
61 return F64{Value{value}};
62}
63
64U1 IREmitter::ConditionRef(const U1& value) {
65 return Inst<U1>(Opcode::ConditionRef, value);
66}
67
68void IREmitter::Reference(const Value& value) {
69 Inst(Opcode::Reference, value);
70}
71
72void IREmitter::PhiMove(IR::Inst& phi, const Value& value) {
73 Inst(Opcode::PhiMove, Value{&phi}, value);
74}
75
76void IREmitter::Prologue() {
77 Inst(Opcode::Prologue);
78}
79
80void IREmitter::Epilogue() {
81 Inst(Opcode::Epilogue);
82}
83
84void IREmitter::DemoteToHelperInvocation() {
85 Inst(Opcode::DemoteToHelperInvocation);
86}
87
88void IREmitter::EmitVertex(const U32& stream) {
89 Inst(Opcode::EmitVertex, stream);
90}
91
92void IREmitter::EndPrimitive(const U32& stream) {
93 Inst(Opcode::EndPrimitive, stream);
94}
95
96void IREmitter::Barrier() {
97 Inst(Opcode::Barrier);
98}
99
100void IREmitter::WorkgroupMemoryBarrier() {
101 Inst(Opcode::WorkgroupMemoryBarrier);
102}
103
104void IREmitter::DeviceMemoryBarrier() {
105 Inst(Opcode::DeviceMemoryBarrier);
106}
107
108U32 IREmitter::GetReg(IR::Reg reg) {
109 return Inst<U32>(Opcode::GetRegister, reg);
110}
111
112void IREmitter::SetReg(IR::Reg reg, const U32& value) {
113 Inst(Opcode::SetRegister, reg, value);
114}
115
116U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) {
117 if (pred == Pred::PT) {
118 return Imm1(!is_negated);
119 }
120 const U1 value{Inst<U1>(Opcode::GetPred, pred)};
121 if (is_negated) {
122 return Inst<U1>(Opcode::LogicalNot, value);
123 } else {
124 return value;
125 }
126}
127
128void IREmitter::SetPred(IR::Pred pred, const U1& value) {
129 if (pred != IR::Pred::PT) {
130 Inst(Opcode::SetPred, pred, value);
131 }
132}
133
134U1 IREmitter::GetGotoVariable(u32 id) {
135 return Inst<U1>(Opcode::GetGotoVariable, id);
136}
137
138void IREmitter::SetGotoVariable(u32 id, const U1& value) {
139 Inst(Opcode::SetGotoVariable, id, value);
140}
141
142U32 IREmitter::GetIndirectBranchVariable() {
143 return Inst<U32>(Opcode::GetIndirectBranchVariable);
144}
145
146void IREmitter::SetIndirectBranchVariable(const U32& value) {
147 Inst(Opcode::SetIndirectBranchVariable, value);
148}
149
150U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) {
151 return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset);
152}
153
154Value IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
155 bool is_signed) {
156 switch (bitsize) {
157 case 8:
158 return Inst<U32>(is_signed ? Opcode::GetCbufS8 : Opcode::GetCbufU8, binding, byte_offset);
159 case 16:
160 return Inst<U32>(is_signed ? Opcode::GetCbufS16 : Opcode::GetCbufU16, binding, byte_offset);
161 case 32:
162 return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset);
163 case 64:
164 return Inst(Opcode::GetCbufU32x2, binding, byte_offset);
165 default:
166 throw InvalidArgument("Invalid bit size {}", bitsize);
167 }
168}
169
170F32 IREmitter::GetFloatCbuf(const U32& binding, const U32& byte_offset) {
171 return Inst<F32>(Opcode::GetCbufF32, binding, byte_offset);
172}
173
174U1 IREmitter::GetZFlag() {
175 return Inst<U1>(Opcode::GetZFlag);
176}
177
178U1 IREmitter::GetSFlag() {
179 return Inst<U1>(Opcode::GetSFlag);
180}
181
182U1 IREmitter::GetCFlag() {
183 return Inst<U1>(Opcode::GetCFlag);
184}
185
186U1 IREmitter::GetOFlag() {
187 return Inst<U1>(Opcode::GetOFlag);
188}
189
190void IREmitter::SetZFlag(const U1& value) {
191 Inst(Opcode::SetZFlag, value);
192}
193
194void IREmitter::SetSFlag(const U1& value) {
195 Inst(Opcode::SetSFlag, value);
196}
197
198void IREmitter::SetCFlag(const U1& value) {
199 Inst(Opcode::SetCFlag, value);
200}
201
202void IREmitter::SetOFlag(const U1& value) {
203 Inst(Opcode::SetOFlag, value);
204}
205
206static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) {
207 switch (flow_test) {
208 case FlowTest::F:
209 return ir.Imm1(false);
210 case FlowTest::LT:
211 return ir.LogicalXor(ir.LogicalAnd(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag())),
212 ir.GetOFlag());
213 case FlowTest::EQ:
214 return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag());
215 case FlowTest::LE:
216 return ir.LogicalXor(ir.GetSFlag(), ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag()));
217 case FlowTest::GT:
218 return ir.LogicalAnd(ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()), ir.GetOFlag()),
219 ir.LogicalNot(ir.GetZFlag()));
220 case FlowTest::NE:
221 return ir.LogicalNot(ir.GetZFlag());
222 case FlowTest::GE:
223 return ir.LogicalNot(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()));
224 case FlowTest::NUM:
225 return ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag()));
226 case FlowTest::NaN:
227 return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag());
228 case FlowTest::LTU:
229 return ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag());
230 case FlowTest::EQU:
231 return ir.GetZFlag();
232 case FlowTest::LEU:
233 return ir.LogicalOr(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()), ir.GetZFlag());
234 case FlowTest::GTU:
235 return ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()),
236 ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag()));
237 case FlowTest::NEU:
238 return ir.LogicalOr(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag()));
239 case FlowTest::GEU:
240 return ir.LogicalXor(ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag()),
241 ir.GetOFlag());
242 case FlowTest::T:
243 return ir.Imm1(true);
244 case FlowTest::OFF:
245 return ir.LogicalNot(ir.GetOFlag());
246 case FlowTest::LO:
247 return ir.LogicalNot(ir.GetCFlag());
248 case FlowTest::SFF:
249 return ir.LogicalNot(ir.GetSFlag());
250 case FlowTest::LS:
251 return ir.LogicalOr(ir.GetZFlag(), ir.LogicalNot(ir.GetCFlag()));
252 case FlowTest::HI:
253 return ir.LogicalAnd(ir.GetCFlag(), ir.LogicalNot(ir.GetZFlag()));
254 case FlowTest::SFT:
255 return ir.GetSFlag();
256 case FlowTest::HS:
257 return ir.GetCFlag();
258 case FlowTest::OFT:
259 return ir.GetOFlag();
260 case FlowTest::RLE:
261 return ir.LogicalOr(ir.GetSFlag(), ir.GetZFlag());
262 case FlowTest::RGT:
263 return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag()));
264 case FlowTest::FCSM_TR:
265 LOG_WARNING(Shader, "(STUBBED) FCSM_TR");
266 return ir.Imm1(false);
267 case FlowTest::CSM_TA:
268 case FlowTest::CSM_TR:
269 case FlowTest::CSM_MX:
270 case FlowTest::FCSM_TA:
271 case FlowTest::FCSM_MX:
272 default:
273 throw NotImplementedException("Flow test {}", flow_test);
274 }
275}
276
277U1 IREmitter::Condition(IR::Condition cond) {
278 const FlowTest flow_test{cond.GetFlowTest()};
279 const auto [pred, is_negated]{cond.GetPred()};
280 if (flow_test == FlowTest::T) {
281 return GetPred(pred, is_negated);
282 }
283 return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test));
284}
285
286U1 IREmitter::GetFlowTestResult(FlowTest test) {
287 return GetFlowTest(*this, test);
288}
289
290F32 IREmitter::GetAttribute(IR::Attribute attribute) {
291 return GetAttribute(attribute, Imm32(0));
292}
293
294F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) {
295 return Inst<F32>(Opcode::GetAttribute, attribute, vertex);
296}
297
298void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) {
299 Inst(Opcode::SetAttribute, attribute, value, vertex);
300}
301
302F32 IREmitter::GetAttributeIndexed(const U32& phys_address) {
303 return GetAttributeIndexed(phys_address, Imm32(0));
304}
305
306F32 IREmitter::GetAttributeIndexed(const U32& phys_address, const U32& vertex) {
307 return Inst<F32>(Opcode::GetAttributeIndexed, phys_address, vertex);
308}
309
310void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex) {
311 Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex);
312}
313
314F32 IREmitter::GetPatch(Patch patch) {
315 return Inst<F32>(Opcode::GetPatch, patch);
316}
317
318void IREmitter::SetPatch(Patch patch, const F32& value) {
319 Inst(Opcode::SetPatch, patch, value);
320}
321
322void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) {
323 Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value);
324}
325
326void IREmitter::SetSampleMask(const U32& value) {
327 Inst(Opcode::SetSampleMask, value);
328}
329
330void IREmitter::SetFragDepth(const F32& value) {
331 Inst(Opcode::SetFragDepth, value);
332}
333
334U32 IREmitter::WorkgroupIdX() {
335 return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)};
336}
337
338U32 IREmitter::WorkgroupIdY() {
339 return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 1)};
340}
341
342U32 IREmitter::WorkgroupIdZ() {
343 return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)};
344}
345
346Value IREmitter::LocalInvocationId() {
347 return Inst(Opcode::LocalInvocationId);
348}
349
350U32 IREmitter::LocalInvocationIdX() {
351 return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)};
352}
353
354U32 IREmitter::LocalInvocationIdY() {
355 return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 1)};
356}
357
358U32 IREmitter::LocalInvocationIdZ() {
359 return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)};
360}
361
362U32 IREmitter::InvocationId() {
363 return Inst<U32>(Opcode::InvocationId);
364}
365
366U32 IREmitter::SampleId() {
367 return Inst<U32>(Opcode::SampleId);
368}
369
370U1 IREmitter::IsHelperInvocation() {
371 return Inst<U1>(Opcode::IsHelperInvocation);
372}
373
374F32 IREmitter::YDirection() {
375 return Inst<F32>(Opcode::YDirection);
376}
377
378U32 IREmitter::LaneId() {
379 return Inst<U32>(Opcode::LaneId);
380}
381
382U32 IREmitter::LoadGlobalU8(const U64& address) {
383 return Inst<U32>(Opcode::LoadGlobalU8, address);
384}
385
386U32 IREmitter::LoadGlobalS8(const U64& address) {
387 return Inst<U32>(Opcode::LoadGlobalS8, address);
388}
389
390U32 IREmitter::LoadGlobalU16(const U64& address) {
391 return Inst<U32>(Opcode::LoadGlobalU16, address);
392}
393
394U32 IREmitter::LoadGlobalS16(const U64& address) {
395 return Inst<U32>(Opcode::LoadGlobalS16, address);
396}
397
398U32 IREmitter::LoadGlobal32(const U64& address) {
399 return Inst<U32>(Opcode::LoadGlobal32, address);
400}
401
402Value IREmitter::LoadGlobal64(const U64& address) {
403 return Inst<Value>(Opcode::LoadGlobal64, address);
404}
405
406Value IREmitter::LoadGlobal128(const U64& address) {
407 return Inst<Value>(Opcode::LoadGlobal128, address);
408}
409
410void IREmitter::WriteGlobalU8(const U64& address, const U32& value) {
411 Inst(Opcode::WriteGlobalU8, address, value);
412}
413
414void IREmitter::WriteGlobalS8(const U64& address, const U32& value) {
415 Inst(Opcode::WriteGlobalS8, address, value);
416}
417
418void IREmitter::WriteGlobalU16(const U64& address, const U32& value) {
419 Inst(Opcode::WriteGlobalU16, address, value);
420}
421
422void IREmitter::WriteGlobalS16(const U64& address, const U32& value) {
423 Inst(Opcode::WriteGlobalS16, address, value);
424}
425
426void IREmitter::WriteGlobal32(const U64& address, const U32& value) {
427 Inst(Opcode::WriteGlobal32, address, value);
428}
429
430void IREmitter::WriteGlobal64(const U64& address, const IR::Value& vector) {
431 Inst(Opcode::WriteGlobal64, address, vector);
432}
433
434void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) {
435 Inst(Opcode::WriteGlobal128, address, vector);
436}
437
438U32 IREmitter::LoadLocal(const IR::U32& word_offset) {
439 return Inst<U32>(Opcode::LoadLocal, word_offset);
440}
441
442void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) {
443 Inst(Opcode::WriteLocal, word_offset, value);
444}
445
446Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) {
447 switch (bit_size) {
448 case 8:
449 return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
450 case 16:
451 return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
452 case 32:
453 return Inst(Opcode::LoadSharedU32, offset);
454 case 64:
455 return Inst(Opcode::LoadSharedU64, offset);
456 case 128:
457 return Inst(Opcode::LoadSharedU128, offset);
458 }
459 throw InvalidArgument("Invalid bit size {}", bit_size);
460}
461
462void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) {
463 switch (bit_size) {
464 case 8:
465 Inst(Opcode::WriteSharedU8, offset, value);
466 break;
467 case 16:
468 Inst(Opcode::WriteSharedU16, offset, value);
469 break;
470 case 32:
471 Inst(Opcode::WriteSharedU32, offset, value);
472 break;
473 case 64:
474 Inst(Opcode::WriteSharedU64, offset, value);
475 break;
476 case 128:
477 Inst(Opcode::WriteSharedU128, offset, value);
478 break;
479 default:
480 throw InvalidArgument("Invalid bit size {}", bit_size);
481 }
482}
483
484U1 IREmitter::GetZeroFromOp(const Value& op) {
485 return Inst<U1>(Opcode::GetZeroFromOp, op);
486}
487
488U1 IREmitter::GetSignFromOp(const Value& op) {
489 return Inst<U1>(Opcode::GetSignFromOp, op);
490}
491
492U1 IREmitter::GetCarryFromOp(const Value& op) {
493 return Inst<U1>(Opcode::GetCarryFromOp, op);
494}
495
496U1 IREmitter::GetOverflowFromOp(const Value& op) {
497 return Inst<U1>(Opcode::GetOverflowFromOp, op);
498}
499
500U1 IREmitter::GetSparseFromOp(const Value& op) {
501 return Inst<U1>(Opcode::GetSparseFromOp, op);
502}
503
504U1 IREmitter::GetInBoundsFromOp(const Value& op) {
505 return Inst<U1>(Opcode::GetInBoundsFromOp, op);
506}
507
508F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) {
509 if (a.Type() != b.Type()) {
510 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
511 }
512 switch (a.Type()) {
513 case Type::F16:
514 return Inst<F16>(Opcode::FPAdd16, Flags{control}, a, b);
515 case Type::F32:
516 return Inst<F32>(Opcode::FPAdd32, Flags{control}, a, b);
517 case Type::F64:
518 return Inst<F64>(Opcode::FPAdd64, Flags{control}, a, b);
519 default:
520 ThrowInvalidType(a.Type());
521 }
522}
523
524Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) {
525 if (e1.Type() != e2.Type()) {
526 throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type());
527 }
528 switch (e1.Type()) {
529 case Type::U32:
530 return Inst(Opcode::CompositeConstructU32x2, e1, e2);
531 case Type::F16:
532 return Inst(Opcode::CompositeConstructF16x2, e1, e2);
533 case Type::F32:
534 return Inst(Opcode::CompositeConstructF32x2, e1, e2);
535 case Type::F64:
536 return Inst(Opcode::CompositeConstructF64x2, e1, e2);
537 default:
538 ThrowInvalidType(e1.Type());
539 }
540}
541
542Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3) {
543 if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) {
544 throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type());
545 }
546 switch (e1.Type()) {
547 case Type::U32:
548 return Inst(Opcode::CompositeConstructU32x3, e1, e2, e3);
549 case Type::F16:
550 return Inst(Opcode::CompositeConstructF16x3, e1, e2, e3);
551 case Type::F32:
552 return Inst(Opcode::CompositeConstructF32x3, e1, e2, e3);
553 case Type::F64:
554 return Inst(Opcode::CompositeConstructF64x3, e1, e2, e3);
555 default:
556 ThrowInvalidType(e1.Type());
557 }
558}
559
560Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
561 const Value& e4) {
562 if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) {
563 throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(),
564 e3.Type(), e4.Type());
565 }
566 switch (e1.Type()) {
567 case Type::U32:
568 return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4);
569 case Type::F16:
570 return Inst(Opcode::CompositeConstructF16x4, e1, e2, e3, e4);
571 case Type::F32:
572 return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4);
573 case Type::F64:
574 return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4);
575 default:
576 ThrowInvalidType(e1.Type());
577 }
578}
579
580Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
581 const auto read{[&](Opcode opcode, size_t limit) -> Value {
582 if (element >= limit) {
583 throw InvalidArgument("Out of bounds element {}", element);
584 }
585 return Inst(opcode, vector, Value{static_cast<u32>(element)});
586 }};
587 switch (vector.Type()) {
588 case Type::U32x2:
589 return read(Opcode::CompositeExtractU32x2, 2);
590 case Type::U32x3:
591 return read(Opcode::CompositeExtractU32x3, 3);
592 case Type::U32x4:
593 return read(Opcode::CompositeExtractU32x4, 4);
594 case Type::F16x2:
595 return read(Opcode::CompositeExtractF16x2, 2);
596 case Type::F16x3:
597 return read(Opcode::CompositeExtractF16x3, 3);
598 case Type::F16x4:
599 return read(Opcode::CompositeExtractF16x4, 4);
600 case Type::F32x2:
601 return read(Opcode::CompositeExtractF32x2, 2);
602 case Type::F32x3:
603 return read(Opcode::CompositeExtractF32x3, 3);
604 case Type::F32x4:
605 return read(Opcode::CompositeExtractF32x4, 4);
606 case Type::F64x2:
607 return read(Opcode::CompositeExtractF64x2, 2);
608 case Type::F64x3:
609 return read(Opcode::CompositeExtractF64x3, 3);
610 case Type::F64x4:
611 return read(Opcode::CompositeExtractF64x4, 4);
612 default:
613 ThrowInvalidType(vector.Type());
614 }
615}
616
617Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) {
618 const auto insert{[&](Opcode opcode, size_t limit) {
619 if (element >= limit) {
620 throw InvalidArgument("Out of bounds element {}", element);
621 }
622 return Inst(opcode, vector, object, Value{static_cast<u32>(element)});
623 }};
624 switch (vector.Type()) {
625 case Type::U32x2:
626 return insert(Opcode::CompositeInsertU32x2, 2);
627 case Type::U32x3:
628 return insert(Opcode::CompositeInsertU32x3, 3);
629 case Type::U32x4:
630 return insert(Opcode::CompositeInsertU32x4, 4);
631 case Type::F16x2:
632 return insert(Opcode::CompositeInsertF16x2, 2);
633 case Type::F16x3:
634 return insert(Opcode::CompositeInsertF16x3, 3);
635 case Type::F16x4:
636 return insert(Opcode::CompositeInsertF16x4, 4);
637 case Type::F32x2:
638 return insert(Opcode::CompositeInsertF32x2, 2);
639 case Type::F32x3:
640 return insert(Opcode::CompositeInsertF32x3, 3);
641 case Type::F32x4:
642 return insert(Opcode::CompositeInsertF32x4, 4);
643 case Type::F64x2:
644 return insert(Opcode::CompositeInsertF64x2, 2);
645 case Type::F64x3:
646 return insert(Opcode::CompositeInsertF64x3, 3);
647 case Type::F64x4:
648 return insert(Opcode::CompositeInsertF64x4, 4);
649 default:
650 ThrowInvalidType(vector.Type());
651 }
652}
653
654Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) {
655 if (true_value.Type() != false_value.Type()) {
656 throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type());
657 }
658 switch (true_value.Type()) {
659 case Type::U1:
660 return Inst(Opcode::SelectU1, condition, true_value, false_value);
661 case Type::U8:
662 return Inst(Opcode::SelectU8, condition, true_value, false_value);
663 case Type::U16:
664 return Inst(Opcode::SelectU16, condition, true_value, false_value);
665 case Type::U32:
666 return Inst(Opcode::SelectU32, condition, true_value, false_value);
667 case Type::U64:
668 return Inst(Opcode::SelectU64, condition, true_value, false_value);
669 case Type::F32:
670 return Inst(Opcode::SelectF32, condition, true_value, false_value);
671 case Type::F64:
672 return Inst(Opcode::SelectF64, condition, true_value, false_value);
673 default:
674 throw InvalidArgument("Invalid type {}", true_value.Type());
675 }
676}
677
678template <>
679IR::U32 IREmitter::BitCast<IR::U32, IR::F32>(const IR::F32& value) {
680 return Inst<IR::U32>(Opcode::BitCastU32F32, value);
681}
682
683template <>
684IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) {
685 return Inst<IR::F32>(Opcode::BitCastF32U32, value);
686}
687
688template <>
689IR::U16 IREmitter::BitCast<IR::U16, IR::F16>(const IR::F16& value) {
690 return Inst<IR::U16>(Opcode::BitCastU16F16, value);
691}
692
693template <>
694IR::F16 IREmitter::BitCast<IR::F16, IR::U16>(const IR::U16& value) {
695 return Inst<IR::F16>(Opcode::BitCastF16U16, value);
696}
697
698template <>
699IR::U64 IREmitter::BitCast<IR::U64, IR::F64>(const IR::F64& value) {
700 return Inst<IR::U64>(Opcode::BitCastU64F64, value);
701}
702
703template <>
704IR::F64 IREmitter::BitCast<IR::F64, IR::U64>(const IR::U64& value) {
705 return Inst<IR::F64>(Opcode::BitCastF64U64, value);
706}
707
708U64 IREmitter::PackUint2x32(const Value& vector) {
709 return Inst<U64>(Opcode::PackUint2x32, vector);
710}
711
712Value IREmitter::UnpackUint2x32(const U64& value) {
713 return Inst<Value>(Opcode::UnpackUint2x32, value);
714}
715
716U32 IREmitter::PackFloat2x16(const Value& vector) {
717 return Inst<U32>(Opcode::PackFloat2x16, vector);
718}
719
720Value IREmitter::UnpackFloat2x16(const U32& value) {
721 return Inst(Opcode::UnpackFloat2x16, value);
722}
723
724U32 IREmitter::PackHalf2x16(const Value& vector) {
725 return Inst<U32>(Opcode::PackHalf2x16, vector);
726}
727
728Value IREmitter::UnpackHalf2x16(const U32& value) {
729 return Inst(Opcode::UnpackHalf2x16, value);
730}
731
732F64 IREmitter::PackDouble2x32(const Value& vector) {
733 return Inst<F64>(Opcode::PackDouble2x32, vector);
734}
735
736Value IREmitter::UnpackDouble2x32(const F64& value) {
737 return Inst<Value>(Opcode::UnpackDouble2x32, value);
738}
739
740F16F32F64 IREmitter::FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control) {
741 if (a.Type() != b.Type()) {
742 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
743 }
744 switch (a.Type()) {
745 case Type::F16:
746 return Inst<F16>(Opcode::FPMul16, Flags{control}, a, b);
747 case Type::F32:
748 return Inst<F32>(Opcode::FPMul32, Flags{control}, a, b);
749 case Type::F64:
750 return Inst<F64>(Opcode::FPMul64, Flags{control}, a, b);
751 default:
752 ThrowInvalidType(a.Type());
753 }
754}
755
756F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c,
757 FpControl control) {
758 if (a.Type() != b.Type() || a.Type() != c.Type()) {
759 throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
760 }
761 switch (a.Type()) {
762 case Type::F16:
763 return Inst<F16>(Opcode::FPFma16, Flags{control}, a, b, c);
764 case Type::F32:
765 return Inst<F32>(Opcode::FPFma32, Flags{control}, a, b, c);
766 case Type::F64:
767 return Inst<F64>(Opcode::FPFma64, Flags{control}, a, b, c);
768 default:
769 ThrowInvalidType(a.Type());
770 }
771}
772
773F16F32F64 IREmitter::FPAbs(const F16F32F64& value) {
774 switch (value.Type()) {
775 case Type::F16:
776 return Inst<F16>(Opcode::FPAbs16, value);
777 case Type::F32:
778 return Inst<F32>(Opcode::FPAbs32, value);
779 case Type::F64:
780 return Inst<F64>(Opcode::FPAbs64, value);
781 default:
782 ThrowInvalidType(value.Type());
783 }
784}
785
786F16F32F64 IREmitter::FPNeg(const F16F32F64& value) {
787 switch (value.Type()) {
788 case Type::F16:
789 return Inst<F16>(Opcode::FPNeg16, value);
790 case Type::F32:
791 return Inst<F32>(Opcode::FPNeg32, value);
792 case Type::F64:
793 return Inst<F64>(Opcode::FPNeg64, value);
794 default:
795 ThrowInvalidType(value.Type());
796 }
797}
798
799F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) {
800 F16F32F64 result{value};
801 if (abs) {
802 result = FPAbs(result);
803 }
804 if (neg) {
805 result = FPNeg(result);
806 }
807 return result;
808}
809
810F32 IREmitter::FPCos(const F32& value) {
811 return Inst<F32>(Opcode::FPCos, value);
812}
813
814F32 IREmitter::FPSin(const F32& value) {
815 return Inst<F32>(Opcode::FPSin, value);
816}
817
818F32 IREmitter::FPExp2(const F32& value) {
819 return Inst<F32>(Opcode::FPExp2, value);
820}
821
822F32 IREmitter::FPLog2(const F32& value) {
823 return Inst<F32>(Opcode::FPLog2, value);
824}
825
826F32F64 IREmitter::FPRecip(const F32F64& value) {
827 switch (value.Type()) {
828 case Type::F32:
829 return Inst<F32>(Opcode::FPRecip32, value);
830 case Type::F64:
831 return Inst<F64>(Opcode::FPRecip64, value);
832 default:
833 ThrowInvalidType(value.Type());
834 }
835}
836
837F32F64 IREmitter::FPRecipSqrt(const F32F64& value) {
838 switch (value.Type()) {
839 case Type::F32:
840 return Inst<F32>(Opcode::FPRecipSqrt32, value);
841 case Type::F64:
842 return Inst<F64>(Opcode::FPRecipSqrt64, value);
843 default:
844 ThrowInvalidType(value.Type());
845 }
846}
847
848F32 IREmitter::FPSqrt(const F32& value) {
849 return Inst<F32>(Opcode::FPSqrt, value);
850}
851
852F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) {
853 switch (value.Type()) {
854 case Type::F16:
855 return Inst<F16>(Opcode::FPSaturate16, value);
856 case Type::F32:
857 return Inst<F32>(Opcode::FPSaturate32, value);
858 case Type::F64:
859 return Inst<F64>(Opcode::FPSaturate64, value);
860 default:
861 ThrowInvalidType(value.Type());
862 }
863}
864
865F16F32F64 IREmitter::FPClamp(const F16F32F64& value, const F16F32F64& min_value,
866 const F16F32F64& max_value) {
867 if (value.Type() != min_value.Type() || value.Type() != max_value.Type()) {
868 throw InvalidArgument("Mismatching types {}, {}, and {}", value.Type(), min_value.Type(),
869 max_value.Type());
870 }
871 switch (value.Type()) {
872 case Type::F16:
873 return Inst<F16>(Opcode::FPClamp16, value, min_value, max_value);
874 case Type::F32:
875 return Inst<F32>(Opcode::FPClamp32, value, min_value, max_value);
876 case Type::F64:
877 return Inst<F64>(Opcode::FPClamp64, value, min_value, max_value);
878 default:
879 ThrowInvalidType(value.Type());
880 }
881}
882
883F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) {
884 switch (value.Type()) {
885 case Type::F16:
886 return Inst<F16>(Opcode::FPRoundEven16, Flags{control}, value);
887 case Type::F32:
888 return Inst<F32>(Opcode::FPRoundEven32, Flags{control}, value);
889 case Type::F64:
890 return Inst<F64>(Opcode::FPRoundEven64, Flags{control}, value);
891 default:
892 ThrowInvalidType(value.Type());
893 }
894}
895
896F16F32F64 IREmitter::FPFloor(const F16F32F64& value, FpControl control) {
897 switch (value.Type()) {
898 case Type::F16:
899 return Inst<F16>(Opcode::FPFloor16, Flags{control}, value);
900 case Type::F32:
901 return Inst<F32>(Opcode::FPFloor32, Flags{control}, value);
902 case Type::F64:
903 return Inst<F64>(Opcode::FPFloor64, Flags{control}, value);
904 default:
905 ThrowInvalidType(value.Type());
906 }
907}
908
909F16F32F64 IREmitter::FPCeil(const F16F32F64& value, FpControl control) {
910 switch (value.Type()) {
911 case Type::F16:
912 return Inst<F16>(Opcode::FPCeil16, Flags{control}, value);
913 case Type::F32:
914 return Inst<F32>(Opcode::FPCeil32, Flags{control}, value);
915 case Type::F64:
916 return Inst<F64>(Opcode::FPCeil64, Flags{control}, value);
917 default:
918 ThrowInvalidType(value.Type());
919 }
920}
921
922F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) {
923 switch (value.Type()) {
924 case Type::F16:
925 return Inst<F16>(Opcode::FPTrunc16, Flags{control}, value);
926 case Type::F32:
927 return Inst<F32>(Opcode::FPTrunc32, Flags{control}, value);
928 case Type::F64:
929 return Inst<F64>(Opcode::FPTrunc64, Flags{control}, value);
930 default:
931 ThrowInvalidType(value.Type());
932 }
933}
934
935U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, bool ordered) {
936 if (lhs.Type() != rhs.Type()) {
937 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
938 }
939 switch (lhs.Type()) {
940 case Type::F16:
941 return Inst<U1>(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, Flags{control},
942 lhs, rhs);
943 case Type::F32:
944 return Inst<U1>(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, Flags{control},
945 lhs, rhs);
946 case Type::F64:
947 return Inst<U1>(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, Flags{control},
948 lhs, rhs);
949 default:
950 ThrowInvalidType(lhs.Type());
951 }
952}
953
954U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
955 bool ordered) {
956 if (lhs.Type() != rhs.Type()) {
957 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
958 }
959 switch (lhs.Type()) {
960 case Type::F16:
961 return Inst<U1>(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16,
962 Flags{control}, lhs, rhs);
963 case Type::F32:
964 return Inst<U1>(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32,
965 Flags{control}, lhs, rhs);
966 case Type::F64:
967 return Inst<U1>(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64,
968 Flags{control}, lhs, rhs);
969 default:
970 ThrowInvalidType(lhs.Type());
971 }
972}
973
974U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
975 bool ordered) {
976 if (lhs.Type() != rhs.Type()) {
977 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
978 }
979 switch (lhs.Type()) {
980 case Type::F16:
981 return Inst<U1>(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16,
982 Flags{control}, lhs, rhs);
983 case Type::F32:
984 return Inst<U1>(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32,
985 Flags{control}, lhs, rhs);
986 case Type::F64:
987 return Inst<U1>(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64,
988 Flags{control}, lhs, rhs);
989 default:
990 ThrowInvalidType(lhs.Type());
991 }
992}
993
994U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
995 bool ordered) {
996 if (lhs.Type() != rhs.Type()) {
997 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
998 }
999 switch (lhs.Type()) {
1000 case Type::F16:
1001 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16,
1002 Flags{control}, lhs, rhs);
1003 case Type::F32:
1004 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32,
1005 Flags{control}, lhs, rhs);
1006 case Type::F64:
1007 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64,
1008 Flags{control}, lhs, rhs);
1009 default:
1010 ThrowInvalidType(lhs.Type());
1011 }
1012}
1013
1014U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
1015 bool ordered) {
1016 if (lhs.Type() != rhs.Type()) {
1017 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1018 }
1019 switch (lhs.Type()) {
1020 case Type::F16:
1021 return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16,
1022 Flags{control}, lhs, rhs);
1023 case Type::F32:
1024 return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32,
1025 Flags{control}, lhs, rhs);
1026 case Type::F64:
1027 return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64,
1028 Flags{control}, lhs, rhs);
1029 default:
1030 ThrowInvalidType(lhs.Type());
1031 }
1032}
1033
1034U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
1035 bool ordered) {
1036 if (lhs.Type() != rhs.Type()) {
1037 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1038 }
1039 switch (lhs.Type()) {
1040 case Type::F16:
1041 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual16
1042 : Opcode::FPUnordGreaterThanEqual16,
1043 Flags{control}, lhs, rhs);
1044 case Type::F32:
1045 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual32
1046 : Opcode::FPUnordGreaterThanEqual32,
1047 Flags{control}, lhs, rhs);
1048 case Type::F64:
1049 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual64
1050 : Opcode::FPUnordGreaterThanEqual64,
1051 Flags{control}, lhs, rhs);
1052 default:
1053 ThrowInvalidType(lhs.Type());
1054 }
1055}
1056
1057U1 IREmitter::FPIsNan(const F16F32F64& value) {
1058 switch (value.Type()) {
1059 case Type::F16:
1060 return Inst<U1>(Opcode::FPIsNan16, value);
1061 case Type::F32:
1062 return Inst<U1>(Opcode::FPIsNan32, value);
1063 case Type::F64:
1064 return Inst<U1>(Opcode::FPIsNan64, value);
1065 default:
1066 ThrowInvalidType(value.Type());
1067 }
1068}
1069
1070U1 IREmitter::FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs) {
1071 if (lhs.Type() != rhs.Type()) {
1072 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1073 }
1074 return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs)));
1075}
1076
1077U1 IREmitter::FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs) {
1078 if (lhs.Type() != rhs.Type()) {
1079 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1080 }
1081 return LogicalOr(FPIsNan(lhs), FPIsNan(rhs));
1082}
1083
1084F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control) {
1085 if (lhs.Type() != rhs.Type()) {
1086 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1087 }
1088 switch (lhs.Type()) {
1089 case Type::F32:
1090 return Inst<F32>(Opcode::FPMax32, Flags{control}, lhs, rhs);
1091 case Type::F64:
1092 return Inst<F64>(Opcode::FPMax64, Flags{control}, lhs, rhs);
1093 default:
1094 ThrowInvalidType(lhs.Type());
1095 }
1096}
1097
1098F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control) {
1099 if (lhs.Type() != rhs.Type()) {
1100 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1101 }
1102 switch (lhs.Type()) {
1103 case Type::F32:
1104 return Inst<F32>(Opcode::FPMin32, Flags{control}, lhs, rhs);
1105 case Type::F64:
1106 return Inst<F64>(Opcode::FPMin64, Flags{control}, lhs, rhs);
1107 default:
1108 ThrowInvalidType(lhs.Type());
1109 }
1110}
1111
1112U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
1113 if (a.Type() != b.Type()) {
1114 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
1115 }
1116 switch (a.Type()) {
1117 case Type::U32:
1118 return Inst<U32>(Opcode::IAdd32, a, b);
1119 case Type::U64:
1120 return Inst<U64>(Opcode::IAdd64, a, b);
1121 default:
1122 ThrowInvalidType(a.Type());
1123 }
1124}
1125
1126U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) {
1127 if (a.Type() != b.Type()) {
1128 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
1129 }
1130 switch (a.Type()) {
1131 case Type::U32:
1132 return Inst<U32>(Opcode::ISub32, a, b);
1133 case Type::U64:
1134 return Inst<U64>(Opcode::ISub64, a, b);
1135 default:
1136 ThrowInvalidType(a.Type());
1137 }
1138}
1139
1140U32 IREmitter::IMul(const U32& a, const U32& b) {
1141 return Inst<U32>(Opcode::IMul32, a, b);
1142}
1143
1144U32U64 IREmitter::INeg(const U32U64& value) {
1145 switch (value.Type()) {
1146 case Type::U32:
1147 return Inst<U32>(Opcode::INeg32, value);
1148 case Type::U64:
1149 return Inst<U64>(Opcode::INeg64, value);
1150 default:
1151 ThrowInvalidType(value.Type());
1152 }
1153}
1154
1155U32 IREmitter::IAbs(const U32& value) {
1156 return Inst<U32>(Opcode::IAbs32, value);
1157}
1158
1159U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) {
1160 switch (base.Type()) {
1161 case Type::U32:
1162 return Inst<U32>(Opcode::ShiftLeftLogical32, base, shift);
1163 case Type::U64:
1164 return Inst<U64>(Opcode::ShiftLeftLogical64, base, shift);
1165 default:
1166 ThrowInvalidType(base.Type());
1167 }
1168}
1169
1170U32U64 IREmitter::ShiftRightLogical(const U32U64& base, const U32& shift) {
1171 switch (base.Type()) {
1172 case Type::U32:
1173 return Inst<U32>(Opcode::ShiftRightLogical32, base, shift);
1174 case Type::U64:
1175 return Inst<U64>(Opcode::ShiftRightLogical64, base, shift);
1176 default:
1177 ThrowInvalidType(base.Type());
1178 }
1179}
1180
1181U32U64 IREmitter::ShiftRightArithmetic(const U32U64& base, const U32& shift) {
1182 switch (base.Type()) {
1183 case Type::U32:
1184 return Inst<U32>(Opcode::ShiftRightArithmetic32, base, shift);
1185 case Type::U64:
1186 return Inst<U64>(Opcode::ShiftRightArithmetic64, base, shift);
1187 default:
1188 ThrowInvalidType(base.Type());
1189 }
1190}
1191
1192U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) {
1193 return Inst<U32>(Opcode::BitwiseAnd32, a, b);
1194}
1195
1196U32 IREmitter::BitwiseOr(const U32& a, const U32& b) {
1197 return Inst<U32>(Opcode::BitwiseOr32, a, b);
1198}
1199
1200U32 IREmitter::BitwiseXor(const U32& a, const U32& b) {
1201 return Inst<U32>(Opcode::BitwiseXor32, a, b);
1202}
1203
1204U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
1205 const U32& count) {
1206 return Inst<U32>(Opcode::BitFieldInsert, base, insert, offset, count);
1207}
1208
1209U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count,
1210 bool is_signed) {
1211 return Inst<U32>(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset,
1212 count);
1213}
1214
1215U32 IREmitter::BitReverse(const U32& value) {
1216 return Inst<U32>(Opcode::BitReverse32, value);
1217}
1218
1219U32 IREmitter::BitCount(const U32& value) {
1220 return Inst<U32>(Opcode::BitCount32, value);
1221}
1222
1223U32 IREmitter::BitwiseNot(const U32& value) {
1224 return Inst<U32>(Opcode::BitwiseNot32, value);
1225}
1226
1227U32 IREmitter::FindSMsb(const U32& value) {
1228 return Inst<U32>(Opcode::FindSMsb32, value);
1229}
1230
1231U32 IREmitter::FindUMsb(const U32& value) {
1232 return Inst<U32>(Opcode::FindUMsb32, value);
1233}
1234
1235U32 IREmitter::SMin(const U32& a, const U32& b) {
1236 return Inst<U32>(Opcode::SMin32, a, b);
1237}
1238
1239U32 IREmitter::UMin(const U32& a, const U32& b) {
1240 return Inst<U32>(Opcode::UMin32, a, b);
1241}
1242
1243U32 IREmitter::IMin(const U32& a, const U32& b, bool is_signed) {
1244 return is_signed ? SMin(a, b) : UMin(a, b);
1245}
1246
1247U32 IREmitter::SMax(const U32& a, const U32& b) {
1248 return Inst<U32>(Opcode::SMax32, a, b);
1249}
1250
1251U32 IREmitter::UMax(const U32& a, const U32& b) {
1252 return Inst<U32>(Opcode::UMax32, a, b);
1253}
1254
1255U32 IREmitter::IMax(const U32& a, const U32& b, bool is_signed) {
1256 return is_signed ? SMax(a, b) : UMax(a, b);
1257}
1258
1259U32 IREmitter::SClamp(const U32& value, const U32& min, const U32& max) {
1260 return Inst<U32>(Opcode::SClamp32, value, min, max);
1261}
1262
1263U32 IREmitter::UClamp(const U32& value, const U32& min, const U32& max) {
1264 return Inst<U32>(Opcode::UClamp32, value, min, max);
1265}
1266
1267U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) {
1268 return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs);
1269}
1270
1271U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) {
1272 if (lhs.Type() != rhs.Type()) {
1273 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1274 }
1275 switch (lhs.Type()) {
1276 case Type::U32:
1277 return Inst<U1>(Opcode::IEqual, lhs, rhs);
1278 case Type::U64: {
1279 // Manually compare the unpacked values
1280 const Value lhs_vector{UnpackUint2x32(lhs)};
1281 const Value rhs_vector{UnpackUint2x32(rhs)};
1282 return LogicalAnd(IEqual(IR::U32{CompositeExtract(lhs_vector, 0)},
1283 IR::U32{CompositeExtract(rhs_vector, 0)}),
1284 IEqual(IR::U32{CompositeExtract(lhs_vector, 1)},
1285 IR::U32{CompositeExtract(rhs_vector, 1)}));
1286 }
1287 default:
1288 ThrowInvalidType(lhs.Type());
1289 }
1290}
1291
1292U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
1293 return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs);
1294}
1295
1296U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) {
1297 return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs);
1298}
1299
1300U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) {
1301 return Inst<U1>(Opcode::INotEqual, lhs, rhs);
1302}
1303
1304U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
1305 return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs);
1306}
1307
1308U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) {
1309 return Inst<U32>(Opcode::SharedAtomicIAdd32, pointer_offset, value);
1310}
1311
1312U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) {
1313 return Inst<U32>(Opcode::SharedAtomicSMin32, pointer_offset, value);
1314}
1315
1316U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) {
1317 return Inst<U32>(Opcode::SharedAtomicUMin32, pointer_offset, value);
1318}
1319
1320U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) {
1321 return is_signed ? SharedAtomicSMin(pointer_offset, value)
1322 : SharedAtomicUMin(pointer_offset, value);
1323}
1324
1325U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) {
1326 return Inst<U32>(Opcode::SharedAtomicSMax32, pointer_offset, value);
1327}
1328
1329U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) {
1330 return Inst<U32>(Opcode::SharedAtomicUMax32, pointer_offset, value);
1331}
1332
1333U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) {
1334 return is_signed ? SharedAtomicSMax(pointer_offset, value)
1335 : SharedAtomicUMax(pointer_offset, value);
1336}
1337
1338U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) {
1339 return Inst<U32>(Opcode::SharedAtomicInc32, pointer_offset, value);
1340}
1341
1342U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) {
1343 return Inst<U32>(Opcode::SharedAtomicDec32, pointer_offset, value);
1344}
1345
1346U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) {
1347 return Inst<U32>(Opcode::SharedAtomicAnd32, pointer_offset, value);
1348}
1349
1350U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) {
1351 return Inst<U32>(Opcode::SharedAtomicOr32, pointer_offset, value);
1352}
1353
1354U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) {
1355 return Inst<U32>(Opcode::SharedAtomicXor32, pointer_offset, value);
1356}
1357
1358U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) {
1359 switch (value.Type()) {
1360 case Type::U32:
1361 return Inst<U32>(Opcode::SharedAtomicExchange32, pointer_offset, value);
1362 case Type::U64:
1363 return Inst<U64>(Opcode::SharedAtomicExchange64, pointer_offset, value);
1364 default:
1365 ThrowInvalidType(pointer_offset.Type());
1366 }
1367}
1368
1369U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) {
1370 switch (value.Type()) {
1371 case Type::U32:
1372 return Inst<U32>(Opcode::GlobalAtomicIAdd32, pointer_offset, value);
1373 case Type::U64:
1374 return Inst<U64>(Opcode::GlobalAtomicIAdd64, pointer_offset, value);
1375 default:
1376 ThrowInvalidType(value.Type());
1377 }
1378}
1379
1380U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) {
1381 switch (value.Type()) {
1382 case Type::U32:
1383 return Inst<U32>(Opcode::GlobalAtomicSMin32, pointer_offset, value);
1384 case Type::U64:
1385 return Inst<U64>(Opcode::GlobalAtomicSMin64, pointer_offset, value);
1386 default:
1387 ThrowInvalidType(value.Type());
1388 }
1389}
1390
1391U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) {
1392 switch (value.Type()) {
1393 case Type::U32:
1394 return Inst<U32>(Opcode::GlobalAtomicUMin32, pointer_offset, value);
1395 case Type::U64:
1396 return Inst<U64>(Opcode::GlobalAtomicUMin64, pointer_offset, value);
1397 default:
1398 ThrowInvalidType(value.Type());
1399 }
1400}
1401
1402U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) {
1403 return is_signed ? GlobalAtomicSMin(pointer_offset, value)
1404 : GlobalAtomicUMin(pointer_offset, value);
1405}
1406
1407U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) {
1408 switch (value.Type()) {
1409 case Type::U32:
1410 return Inst<U32>(Opcode::GlobalAtomicSMax32, pointer_offset, value);
1411 case Type::U64:
1412 return Inst<U64>(Opcode::GlobalAtomicSMax64, pointer_offset, value);
1413 default:
1414 ThrowInvalidType(value.Type());
1415 }
1416}
1417
1418U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) {
1419 switch (value.Type()) {
1420 case Type::U32:
1421 return Inst<U32>(Opcode::GlobalAtomicUMax32, pointer_offset, value);
1422 case Type::U64:
1423 return Inst<U64>(Opcode::GlobalAtomicUMax64, pointer_offset, value);
1424 default:
1425 ThrowInvalidType(value.Type());
1426 }
1427}
1428
1429U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) {
1430 return is_signed ? GlobalAtomicSMax(pointer_offset, value)
1431 : GlobalAtomicUMax(pointer_offset, value);
1432}
1433
1434U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) {
1435 return Inst<U32>(Opcode::GlobalAtomicInc32, pointer_offset, value);
1436}
1437
1438U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) {
1439 return Inst<U32>(Opcode::GlobalAtomicDec32, pointer_offset, value);
1440}
1441
1442U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) {
1443 switch (value.Type()) {
1444 case Type::U32:
1445 return Inst<U32>(Opcode::GlobalAtomicAnd32, pointer_offset, value);
1446 case Type::U64:
1447 return Inst<U64>(Opcode::GlobalAtomicAnd64, pointer_offset, value);
1448 default:
1449 ThrowInvalidType(value.Type());
1450 }
1451}
1452
1453U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) {
1454 switch (value.Type()) {
1455 case Type::U32:
1456 return Inst<U32>(Opcode::GlobalAtomicOr32, pointer_offset, value);
1457 case Type::U64:
1458 return Inst<U64>(Opcode::GlobalAtomicOr64, pointer_offset, value);
1459 default:
1460 ThrowInvalidType(value.Type());
1461 }
1462}
1463
1464U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) {
1465 switch (value.Type()) {
1466 case Type::U32:
1467 return Inst<U32>(Opcode::GlobalAtomicXor32, pointer_offset, value);
1468 case Type::U64:
1469 return Inst<U64>(Opcode::GlobalAtomicXor64, pointer_offset, value);
1470 default:
1471 ThrowInvalidType(value.Type());
1472 }
1473}
1474
1475U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) {
1476 switch (value.Type()) {
1477 case Type::U32:
1478 return Inst<U32>(Opcode::GlobalAtomicExchange32, pointer_offset, value);
1479 case Type::U64:
1480 return Inst<U64>(Opcode::GlobalAtomicExchange64, pointer_offset, value);
1481 default:
1482 ThrowInvalidType(pointer_offset.Type());
1483 }
1484}
1485
1486F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
1487 const FpControl control) {
1488 return Inst<F32>(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value);
1489}
1490
1491Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
1492 const FpControl control) {
1493 return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value);
1494}
1495
1496Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
1497 const FpControl control) {
1498 return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value);
1499}
1500
1501Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
1502 const FpControl control) {
1503 return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value);
1504}
1505
1506U1 IREmitter::LogicalOr(const U1& a, const U1& b) {
1507 return Inst<U1>(Opcode::LogicalOr, a, b);
1508}
1509
1510U1 IREmitter::LogicalAnd(const U1& a, const U1& b) {
1511 return Inst<U1>(Opcode::LogicalAnd, a, b);
1512}
1513
1514U1 IREmitter::LogicalXor(const U1& a, const U1& b) {
1515 return Inst<U1>(Opcode::LogicalXor, a, b);
1516}
1517
1518U1 IREmitter::LogicalNot(const U1& value) {
1519 return Inst<U1>(Opcode::LogicalNot, value);
1520}
1521
1522U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) {
1523 switch (bitsize) {
1524 case 16:
1525 switch (value.Type()) {
1526 case Type::F16:
1527 return Inst<U32>(Opcode::ConvertS16F16, value);
1528 case Type::F32:
1529 return Inst<U32>(Opcode::ConvertS16F32, value);
1530 case Type::F64:
1531 return Inst<U32>(Opcode::ConvertS16F64, value);
1532 default:
1533 ThrowInvalidType(value.Type());
1534 }
1535 case 32:
1536 switch (value.Type()) {
1537 case Type::F16:
1538 return Inst<U32>(Opcode::ConvertS32F16, value);
1539 case Type::F32:
1540 return Inst<U32>(Opcode::ConvertS32F32, value);
1541 case Type::F64:
1542 return Inst<U32>(Opcode::ConvertS32F64, value);
1543 default:
1544 ThrowInvalidType(value.Type());
1545 }
1546 case 64:
1547 switch (value.Type()) {
1548 case Type::F16:
1549 return Inst<U64>(Opcode::ConvertS64F16, value);
1550 case Type::F32:
1551 return Inst<U64>(Opcode::ConvertS64F32, value);
1552 case Type::F64:
1553 return Inst<U64>(Opcode::ConvertS64F64, value);
1554 default:
1555 ThrowInvalidType(value.Type());
1556 }
1557 default:
1558 throw InvalidArgument("Invalid destination bitsize {}", bitsize);
1559 }
1560}
1561
1562U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) {
1563 switch (bitsize) {
1564 case 16:
1565 switch (value.Type()) {
1566 case Type::F16:
1567 return Inst<U32>(Opcode::ConvertU16F16, value);
1568 case Type::F32:
1569 return Inst<U32>(Opcode::ConvertU16F32, value);
1570 case Type::F64:
1571 return Inst<U32>(Opcode::ConvertU16F64, value);
1572 default:
1573 ThrowInvalidType(value.Type());
1574 }
1575 case 32:
1576 switch (value.Type()) {
1577 case Type::F16:
1578 return Inst<U32>(Opcode::ConvertU32F16, value);
1579 case Type::F32:
1580 return Inst<U32>(Opcode::ConvertU32F32, value);
1581 case Type::F64:
1582 return Inst<U32>(Opcode::ConvertU32F64, value);
1583 default:
1584 ThrowInvalidType(value.Type());
1585 }
1586 case 64:
1587 switch (value.Type()) {
1588 case Type::F16:
1589 return Inst<U64>(Opcode::ConvertU64F16, value);
1590 case Type::F32:
1591 return Inst<U64>(Opcode::ConvertU64F32, value);
1592 case Type::F64:
1593 return Inst<U64>(Opcode::ConvertU64F64, value);
1594 default:
1595 ThrowInvalidType(value.Type());
1596 }
1597 default:
1598 throw InvalidArgument("Invalid destination bitsize {}", bitsize);
1599 }
1600}
1601
1602U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value) {
1603 return is_signed ? ConvertFToS(bitsize, value) : ConvertFToU(bitsize, value);
1604}
1605
1606F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
1607 FpControl control) {
1608 switch (dest_bitsize) {
1609 case 16:
1610 switch (src_bitsize) {
1611 case 8:
1612 return Inst<F16>(Opcode::ConvertF16S8, Flags{control}, value);
1613 case 16:
1614 return Inst<F16>(Opcode::ConvertF16S16, Flags{control}, value);
1615 case 32:
1616 return Inst<F16>(Opcode::ConvertF16S32, Flags{control}, value);
1617 case 64:
1618 return Inst<F16>(Opcode::ConvertF16S64, Flags{control}, value);
1619 }
1620 break;
1621 case 32:
1622 switch (src_bitsize) {
1623 case 8:
1624 return Inst<F32>(Opcode::ConvertF32S8, Flags{control}, value);
1625 case 16:
1626 return Inst<F32>(Opcode::ConvertF32S16, Flags{control}, value);
1627 case 32:
1628 return Inst<F32>(Opcode::ConvertF32S32, Flags{control}, value);
1629 case 64:
1630 return Inst<F32>(Opcode::ConvertF32S64, Flags{control}, value);
1631 }
1632 break;
1633 case 64:
1634 switch (src_bitsize) {
1635 case 8:
1636 return Inst<F64>(Opcode::ConvertF64S8, Flags{control}, value);
1637 case 16:
1638 return Inst<F64>(Opcode::ConvertF64S16, Flags{control}, value);
1639 case 32:
1640 return Inst<F64>(Opcode::ConvertF64S32, Flags{control}, value);
1641 case 64:
1642 return Inst<F64>(Opcode::ConvertF64S64, Flags{control}, value);
1643 }
1644 break;
1645 }
1646 throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
1647}
1648
1649F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
1650 FpControl control) {
1651 switch (dest_bitsize) {
1652 case 16:
1653 switch (src_bitsize) {
1654 case 8:
1655 return Inst<F16>(Opcode::ConvertF16U8, Flags{control}, value);
1656 case 16:
1657 return Inst<F16>(Opcode::ConvertF16U16, Flags{control}, value);
1658 case 32:
1659 return Inst<F16>(Opcode::ConvertF16U32, Flags{control}, value);
1660 case 64:
1661 return Inst<F16>(Opcode::ConvertF16U64, Flags{control}, value);
1662 }
1663 break;
1664 case 32:
1665 switch (src_bitsize) {
1666 case 8:
1667 return Inst<F32>(Opcode::ConvertF32U8, Flags{control}, value);
1668 case 16:
1669 return Inst<F32>(Opcode::ConvertF32U16, Flags{control}, value);
1670 case 32:
1671 return Inst<F32>(Opcode::ConvertF32U32, Flags{control}, value);
1672 case 64:
1673 return Inst<F32>(Opcode::ConvertF32U64, Flags{control}, value);
1674 }
1675 break;
1676 case 64:
1677 switch (src_bitsize) {
1678 case 8:
1679 return Inst<F64>(Opcode::ConvertF64U8, Flags{control}, value);
1680 case 16:
1681 return Inst<F64>(Opcode::ConvertF64U16, Flags{control}, value);
1682 case 32:
1683 return Inst<F64>(Opcode::ConvertF64U32, Flags{control}, value);
1684 case 64:
1685 return Inst<F64>(Opcode::ConvertF64U64, Flags{control}, value);
1686 }
1687 break;
1688 }
1689 throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
1690}
1691
1692F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
1693 const Value& value, FpControl control) {
1694 return is_signed ? ConvertSToF(dest_bitsize, src_bitsize, value, control)
1695 : ConvertUToF(dest_bitsize, src_bitsize, value, control);
1696}
1697
1698U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) {
1699 switch (result_bitsize) {
1700 case 32:
1701 switch (value.Type()) {
1702 case Type::U32:
1703 // Nothing to do
1704 return value;
1705 case Type::U64:
1706 return Inst<U32>(Opcode::ConvertU32U64, value);
1707 default:
1708 break;
1709 }
1710 break;
1711 case 64:
1712 switch (value.Type()) {
1713 case Type::U32:
1714 return Inst<U64>(Opcode::ConvertU64U32, value);
1715 case Type::U64:
1716 // Nothing to do
1717 return value;
1718 default:
1719 break;
1720 }
1721 }
1722 throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
1723}
1724
1725F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value, FpControl control) {
1726 switch (result_bitsize) {
1727 case 16:
1728 switch (value.Type()) {
1729 case Type::F16:
1730 // Nothing to do
1731 return value;
1732 case Type::F32:
1733 return Inst<F16>(Opcode::ConvertF16F32, Flags{control}, value);
1734 case Type::F64:
1735 throw LogicError("Illegal conversion from F64 to F16");
1736 default:
1737 break;
1738 }
1739 break;
1740 case 32:
1741 switch (value.Type()) {
1742 case Type::F16:
1743 return Inst<F32>(Opcode::ConvertF32F16, Flags{control}, value);
1744 case Type::F32:
1745 // Nothing to do
1746 return value;
1747 case Type::F64:
1748 return Inst<F32>(Opcode::ConvertF32F64, Flags{control}, value);
1749 default:
1750 break;
1751 }
1752 break;
1753 case 64:
1754 switch (value.Type()) {
1755 case Type::F16:
1756 throw LogicError("Illegal conversion from F16 to F64");
1757 case Type::F32:
1758 return Inst<F64>(Opcode::ConvertF64F32, Flags{control}, value);
1759 case Type::F64:
1760 // Nothing to do
1761 return value;
1762 default:
1763 break;
1764 }
1765 break;
1766 }
1767 throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
1768}
1769
1770Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
1771 const Value& offset, const F32& lod_clamp,
1772 TextureInstInfo info) {
1773 const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
1774 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod
1775 : Opcode::BindlessImageSampleImplicitLod};
1776 return Inst(op, Flags{info}, handle, coords, bias_lc, offset);
1777}
1778
1779Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod,
1780 const Value& offset, TextureInstInfo info) {
1781 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod
1782 : Opcode::BindlessImageSampleExplicitLod};
1783 return Inst(op, Flags{info}, handle, coords, lod, offset);
1784}
1785
1786F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref,
1787 const F32& bias, const Value& offset,
1788 const F32& lod_clamp, TextureInstInfo info) {
1789 const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
1790 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod
1791 : Opcode::BindlessImageSampleDrefImplicitLod};
1792 return Inst<F32>(op, Flags{info}, handle, coords, dref, bias_lc, offset);
1793}
1794
1795F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref,
1796 const F32& lod, const Value& offset,
1797 TextureInstInfo info) {
1798 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod
1799 : Opcode::BindlessImageSampleDrefExplicitLod};
1800 return Inst<F32>(op, Flags{info}, handle, coords, dref, lod, offset);
1801}
1802
1803Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset,
1804 const Value& offset2, TextureInstInfo info) {
1805 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGather : Opcode::BindlessImageGather};
1806 return Inst(op, Flags{info}, handle, coords, offset, offset2);
1807}
1808
1809Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset,
1810 const Value& offset2, const F32& dref, TextureInstInfo info) {
1811 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref
1812 : Opcode::BindlessImageGatherDref};
1813 return Inst(op, Flags{info}, handle, coords, offset, offset2, dref);
1814}
1815
1816Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset,
1817 const U32& lod, const U32& multisampling, TextureInstInfo info) {
1818 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageFetch : Opcode::BindlessImageFetch};
1819 return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling);
1820}
1821
1822Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) {
1823 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions
1824 : Opcode::BindlessImageQueryDimensions};
1825 return Inst(op, handle, lod);
1826}
1827
1828Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) {
1829 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryLod
1830 : Opcode::BindlessImageQueryLod};
1831 return Inst(op, Flags{info}, handle, coords);
1832}
1833
1834Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivates,
1835 const Value& offset, const F32& lod_clamp, TextureInstInfo info) {
1836 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient
1837 : Opcode::BindlessImageGradient};
1838 return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp);
1839}
1840
1841Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) {
1842 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageRead : Opcode::BindlessImageRead};
1843 return Inst(op, Flags{info}, handle, coords);
1844}
1845
1846void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color,
1847 TextureInstInfo info) {
1848 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite};
1849 Inst(op, Flags{info}, handle, coords, color);
1850}
1851
1852Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value,
1853 TextureInstInfo info) {
1854 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicIAdd32
1855 : Opcode::BindlessImageAtomicIAdd32};
1856 return Inst(op, Flags{info}, handle, coords, value);
1857}
1858
1859Value IREmitter::ImageAtomicSMin(const Value& handle, const Value& coords, const Value& value,
1860 TextureInstInfo info) {
1861 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMin32
1862 : Opcode::BindlessImageAtomicSMin32};
1863 return Inst(op, Flags{info}, handle, coords, value);
1864}
1865
1866Value IREmitter::ImageAtomicUMin(const Value& handle, const Value& coords, const Value& value,
1867 TextureInstInfo info) {
1868 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMin32
1869 : Opcode::BindlessImageAtomicUMin32};
1870 return Inst(op, Flags{info}, handle, coords, value);
1871}
1872
1873Value IREmitter::ImageAtomicIMin(const Value& handle, const Value& coords, const Value& value,
1874 bool is_signed, TextureInstInfo info) {
1875 return is_signed ? ImageAtomicSMin(handle, coords, value, info)
1876 : ImageAtomicUMin(handle, coords, value, info);
1877}
1878
1879Value IREmitter::ImageAtomicSMax(const Value& handle, const Value& coords, const Value& value,
1880 TextureInstInfo info) {
1881 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMax32
1882 : Opcode::BindlessImageAtomicSMax32};
1883 return Inst(op, Flags{info}, handle, coords, value);
1884}
1885
1886Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value,
1887 TextureInstInfo info) {
1888 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMax32
1889 : Opcode::BindlessImageAtomicUMax32};
1890 return Inst(op, Flags{info}, handle, coords, value);
1891}
1892
1893Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value,
1894 bool is_signed, TextureInstInfo info) {
1895 return is_signed ? ImageAtomicSMax(handle, coords, value, info)
1896 : ImageAtomicUMax(handle, coords, value, info);
1897}
1898
1899Value IREmitter::ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
1900 TextureInstInfo info) {
1901 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicInc32
1902 : Opcode::BindlessImageAtomicInc32};
1903 return Inst(op, Flags{info}, handle, coords, value);
1904}
1905
1906Value IREmitter::ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
1907 TextureInstInfo info) {
1908 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicDec32
1909 : Opcode::BindlessImageAtomicDec32};
1910 return Inst(op, Flags{info}, handle, coords, value);
1911}
1912
1913Value IREmitter::ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
1914 TextureInstInfo info) {
1915 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicAnd32
1916 : Opcode::BindlessImageAtomicAnd32};
1917 return Inst(op, Flags{info}, handle, coords, value);
1918}
1919
1920Value IREmitter::ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
1921 TextureInstInfo info) {
1922 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicOr32
1923 : Opcode::BindlessImageAtomicOr32};
1924 return Inst(op, Flags{info}, handle, coords, value);
1925}
1926
1927Value IREmitter::ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
1928 TextureInstInfo info) {
1929 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicXor32
1930 : Opcode::BindlessImageAtomicXor32};
1931 return Inst(op, Flags{info}, handle, coords, value);
1932}
1933
1934Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value,
1935 TextureInstInfo info) {
1936 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicExchange32
1937 : Opcode::BindlessImageAtomicExchange32};
1938 return Inst(op, Flags{info}, handle, coords, value);
1939}
1940
1941U1 IREmitter::VoteAll(const U1& value) {
1942 return Inst<U1>(Opcode::VoteAll, value);
1943}
1944
1945U1 IREmitter::VoteAny(const U1& value) {
1946 return Inst<U1>(Opcode::VoteAny, value);
1947}
1948
1949U1 IREmitter::VoteEqual(const U1& value) {
1950 return Inst<U1>(Opcode::VoteEqual, value);
1951}
1952
1953U32 IREmitter::SubgroupBallot(const U1& value) {
1954 return Inst<U32>(Opcode::SubgroupBallot, value);
1955}
1956
1957U32 IREmitter::SubgroupEqMask() {
1958 return Inst<U32>(Opcode::SubgroupEqMask);
1959}
1960
1961U32 IREmitter::SubgroupLtMask() {
1962 return Inst<U32>(Opcode::SubgroupLtMask);
1963}
1964
1965U32 IREmitter::SubgroupLeMask() {
1966 return Inst<U32>(Opcode::SubgroupLeMask);
1967}
1968
1969U32 IREmitter::SubgroupGtMask() {
1970 return Inst<U32>(Opcode::SubgroupGtMask);
1971}
1972
1973U32 IREmitter::SubgroupGeMask() {
1974 return Inst<U32>(Opcode::SubgroupGeMask);
1975}
1976
1977U32 IREmitter::ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
1978 const IR::U32& seg_mask) {
1979 return Inst<U32>(Opcode::ShuffleIndex, value, index, clamp, seg_mask);
1980}
1981
1982U32 IREmitter::ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
1983 const IR::U32& seg_mask) {
1984 return Inst<U32>(Opcode::ShuffleUp, value, index, clamp, seg_mask);
1985}
1986
1987U32 IREmitter::ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
1988 const IR::U32& seg_mask) {
1989 return Inst<U32>(Opcode::ShuffleDown, value, index, clamp, seg_mask);
1990}
1991
1992U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
1993 const IR::U32& seg_mask) {
1994 return Inst<U32>(Opcode::ShuffleButterfly, value, index, clamp, seg_mask);
1995}
1996
1997F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control) {
1998 return Inst<F32>(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle);
1999}
2000
2001F32 IREmitter::DPdxFine(const F32& a) {
2002 return Inst<F32>(Opcode::DPdxFine, a);
2003}
2004
2005F32 IREmitter::DPdyFine(const F32& a) {
2006 return Inst<F32>(Opcode::DPdyFine, a);
2007}
2008
2009F32 IREmitter::DPdxCoarse(const F32& a) {
2010 return Inst<F32>(Opcode::DPdxCoarse, a);
2011}
2012
2013F32 IREmitter::DPdyCoarse(const F32& a) {
2014 return Inst<F32>(Opcode::DPdyCoarse, a);
2015}
2016
2017} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
new file mode 100644
index 000000000..53f7b3b06
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -0,0 +1,413 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstring>
8#include <type_traits>
9
10#include "shader_recompiler/frontend/ir/attribute.h"
11#include "shader_recompiler/frontend/ir/basic_block.h"
12#include "shader_recompiler/frontend/ir/modifiers.h"
13#include "shader_recompiler/frontend/ir/value.h"
14
15namespace Shader::IR {
16
17class IREmitter {
18public:
19 explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {}
20 explicit IREmitter(Block& block_, Block::iterator insertion_point_)
21 : block{&block_}, insertion_point{insertion_point_} {}
22
23 Block* block;
24
25 [[nodiscard]] U1 Imm1(bool value) const;
26 [[nodiscard]] U8 Imm8(u8 value) const;
27 [[nodiscard]] U16 Imm16(u16 value) const;
28 [[nodiscard]] U32 Imm32(u32 value) const;
29 [[nodiscard]] U32 Imm32(s32 value) const;
30 [[nodiscard]] F32 Imm32(f32 value) const;
31 [[nodiscard]] U64 Imm64(u64 value) const;
32 [[nodiscard]] U64 Imm64(s64 value) const;
33 [[nodiscard]] F64 Imm64(f64 value) const;
34
35 U1 ConditionRef(const U1& value);
36 void Reference(const Value& value);
37
38 void PhiMove(IR::Inst& phi, const Value& value);
39
40 void Prologue();
41 void Epilogue();
42 void DemoteToHelperInvocation();
43 void EmitVertex(const U32& stream);
44 void EndPrimitive(const U32& stream);
45
46 [[nodiscard]] U32 GetReg(IR::Reg reg);
47 void SetReg(IR::Reg reg, const U32& value);
48
49 [[nodiscard]] U1 GetPred(IR::Pred pred, bool is_negated = false);
50 void SetPred(IR::Pred pred, const U1& value);
51
52 [[nodiscard]] U1 GetGotoVariable(u32 id);
53 void SetGotoVariable(u32 id, const U1& value);
54
55 [[nodiscard]] U32 GetIndirectBranchVariable();
56 void SetIndirectBranchVariable(const U32& value);
57
58 [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset);
59 [[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
60 bool is_signed);
61 [[nodiscard]] F32 GetFloatCbuf(const U32& binding, const U32& byte_offset);
62
63 [[nodiscard]] U1 GetZFlag();
64 [[nodiscard]] U1 GetSFlag();
65 [[nodiscard]] U1 GetCFlag();
66 [[nodiscard]] U1 GetOFlag();
67
68 void SetZFlag(const U1& value);
69 void SetSFlag(const U1& value);
70 void SetCFlag(const U1& value);
71 void SetOFlag(const U1& value);
72
73 [[nodiscard]] U1 Condition(IR::Condition cond);
74 [[nodiscard]] U1 GetFlowTestResult(FlowTest test);
75
76 [[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
77 [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex);
78 void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex);
79
80 [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address);
81 [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex);
82 void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex);
83
84 [[nodiscard]] F32 GetPatch(Patch patch);
85 void SetPatch(Patch patch, const F32& value);
86
87 void SetFragColor(u32 index, u32 component, const F32& value);
88 void SetSampleMask(const U32& value);
89 void SetFragDepth(const F32& value);
90
91 [[nodiscard]] U32 WorkgroupIdX();
92 [[nodiscard]] U32 WorkgroupIdY();
93 [[nodiscard]] U32 WorkgroupIdZ();
94
95 [[nodiscard]] Value LocalInvocationId();
96 [[nodiscard]] U32 LocalInvocationIdX();
97 [[nodiscard]] U32 LocalInvocationIdY();
98 [[nodiscard]] U32 LocalInvocationIdZ();
99
100 [[nodiscard]] U32 InvocationId();
101 [[nodiscard]] U32 SampleId();
102 [[nodiscard]] U1 IsHelperInvocation();
103 [[nodiscard]] F32 YDirection();
104
105 [[nodiscard]] U32 LaneId();
106
107 [[nodiscard]] U32 LoadGlobalU8(const U64& address);
108 [[nodiscard]] U32 LoadGlobalS8(const U64& address);
109 [[nodiscard]] U32 LoadGlobalU16(const U64& address);
110 [[nodiscard]] U32 LoadGlobalS16(const U64& address);
111 [[nodiscard]] U32 LoadGlobal32(const U64& address);
112 [[nodiscard]] Value LoadGlobal64(const U64& address);
113 [[nodiscard]] Value LoadGlobal128(const U64& address);
114
115 void WriteGlobalU8(const U64& address, const U32& value);
116 void WriteGlobalS8(const U64& address, const U32& value);
117 void WriteGlobalU16(const U64& address, const U32& value);
118 void WriteGlobalS16(const U64& address, const U32& value);
119 void WriteGlobal32(const U64& address, const U32& value);
120 void WriteGlobal64(const U64& address, const IR::Value& vector);
121 void WriteGlobal128(const U64& address, const IR::Value& vector);
122
123 [[nodiscard]] U32 LoadLocal(const U32& word_offset);
124 void WriteLocal(const U32& word_offset, const U32& value);
125
126 [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
127 void WriteShared(int bit_size, const U32& offset, const Value& value);
128
129 [[nodiscard]] U1 GetZeroFromOp(const Value& op);
130 [[nodiscard]] U1 GetSignFromOp(const Value& op);
131 [[nodiscard]] U1 GetCarryFromOp(const Value& op);
132 [[nodiscard]] U1 GetOverflowFromOp(const Value& op);
133 [[nodiscard]] U1 GetSparseFromOp(const Value& op);
134 [[nodiscard]] U1 GetInBoundsFromOp(const Value& op);
135
136 [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
137 [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
138 [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
139 const Value& e4);
140 [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
141 [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
142
143 [[nodiscard]] Value Select(const U1& condition, const Value& true_value,
144 const Value& false_value);
145
146 void Barrier();
147 void WorkgroupMemoryBarrier();
148 void DeviceMemoryBarrier();
149
150 template <typename Dest, typename Source>
151 [[nodiscard]] Dest BitCast(const Source& value);
152
153 [[nodiscard]] U64 PackUint2x32(const Value& vector);
154 [[nodiscard]] Value UnpackUint2x32(const U64& value);
155
156 [[nodiscard]] U32 PackFloat2x16(const Value& vector);
157 [[nodiscard]] Value UnpackFloat2x16(const U32& value);
158
159 [[nodiscard]] U32 PackHalf2x16(const Value& vector);
160 [[nodiscard]] Value UnpackHalf2x16(const U32& value);
161
162 [[nodiscard]] F64 PackDouble2x32(const Value& vector);
163 [[nodiscard]] Value UnpackDouble2x32(const F64& value);
164
165 [[nodiscard]] F16F32F64 FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
166 [[nodiscard]] F16F32F64 FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
167 [[nodiscard]] F16F32F64 FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c,
168 FpControl control = {});
169
170 [[nodiscard]] F16F32F64 FPAbs(const F16F32F64& value);
171 [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value);
172 [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg);
173
174 [[nodiscard]] F32 FPCos(const F32& value);
175 [[nodiscard]] F32 FPSin(const F32& value);
176 [[nodiscard]] F32 FPExp2(const F32& value);
177 [[nodiscard]] F32 FPLog2(const F32& value);
178 [[nodiscard]] F32F64 FPRecip(const F32F64& value);
179 [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
180 [[nodiscard]] F32 FPSqrt(const F32& value);
181 [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value);
182 [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value,
183 const F16F32F64& max_value);
184 [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {});
185 [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {});
186 [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {});
187 [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {});
188
189 [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
190 bool ordered = true);
191 [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
192 bool ordered = true);
193 [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
194 bool ordered = true);
195 [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs,
196 FpControl control = {}, bool ordered = true);
197 [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
198 FpControl control = {}, bool ordered = true);
199 [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
200 FpControl control = {}, bool ordered = true);
201 [[nodiscard]] U1 FPIsNan(const F16F32F64& value);
202 [[nodiscard]] U1 FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs);
203 [[nodiscard]] U1 FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs);
204 [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
205 [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
206
207 [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
208 [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
209 [[nodiscard]] U32 IMul(const U32& a, const U32& b);
210 [[nodiscard]] U32U64 INeg(const U32U64& value);
211 [[nodiscard]] U32 IAbs(const U32& value);
212 [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift);
213 [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift);
214 [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift);
215 [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b);
216 [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b);
217 [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b);
218 [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
219 const U32& count);
220 [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
221 bool is_signed = false);
222 [[nodiscard]] U32 BitReverse(const U32& value);
223 [[nodiscard]] U32 BitCount(const U32& value);
224 [[nodiscard]] U32 BitwiseNot(const U32& value);
225
226 [[nodiscard]] U32 FindSMsb(const U32& value);
227 [[nodiscard]] U32 FindUMsb(const U32& value);
228 [[nodiscard]] U32 SMin(const U32& a, const U32& b);
229 [[nodiscard]] U32 UMin(const U32& a, const U32& b);
230 [[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed);
231 [[nodiscard]] U32 SMax(const U32& a, const U32& b);
232 [[nodiscard]] U32 UMax(const U32& a, const U32& b);
233 [[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed);
234 [[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max);
235 [[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max);
236
237 [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed);
238 [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs);
239 [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
240 [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
241 [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
242 [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
243
244 [[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value);
245 [[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value);
246 [[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value);
247 [[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed);
248 [[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value);
249 [[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value);
250 [[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed);
251 [[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value);
252 [[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value);
253 [[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value);
254 [[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value);
255 [[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value);
256 [[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value);
257
258 [[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value);
259 [[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value);
260 [[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value);
261 [[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value,
262 bool is_signed);
263 [[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value);
264 [[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value);
265 [[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value,
266 bool is_signed);
267 [[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value);
268 [[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value);
269 [[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value);
270 [[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value);
271 [[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value);
272 [[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value);
273
274 [[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
275 const FpControl control = {});
276 [[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
277 const FpControl control = {});
278 [[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
279 const FpControl control = {});
280 [[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
281 const FpControl control = {});
282
283 [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
284 [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
285 [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
286 [[nodiscard]] U1 LogicalNot(const U1& value);
287
288 [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value);
289 [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
290 [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);
291 [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
292 FpControl control = {});
293 [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
294 FpControl control = {});
295 [[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
296 const Value& value, FpControl control = {});
297
298 [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
299 [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value,
300 FpControl control = {});
301
302 [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
303 const F32& bias, const Value& offset,
304 const F32& lod_clamp, TextureInstInfo info);
305 [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords,
306 const F32& lod, const Value& offset,
307 TextureInstInfo info);
308 [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords,
309 const F32& dref, const F32& bias,
310 const Value& offset, const F32& lod_clamp,
311 TextureInstInfo info);
312 [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords,
313 const F32& dref, const F32& lod,
314 const Value& offset, TextureInstInfo info);
315 [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod);
316
317 [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords,
318 TextureInstInfo info);
319 [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset,
320 const Value& offset2, TextureInstInfo info);
321 [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords,
322 const Value& offset, const Value& offset2, const F32& dref,
323 TextureInstInfo info);
324 [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
325 const U32& lod, const U32& multisampling, TextureInstInfo info);
326 [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
327 const Value& derivates, const Value& offset,
328 const F32& lod_clamp, TextureInstInfo info);
329 [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info);
330 [[nodiscard]] void ImageWrite(const Value& handle, const Value& coords, const Value& color,
331 TextureInstInfo info);
332
333 [[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords,
334 const Value& value, TextureInstInfo info);
335 [[nodiscard]] Value ImageAtomicSMin(const Value& handle, const Value& coords,
336 const Value& value, TextureInstInfo info);
337 [[nodiscard]] Value ImageAtomicUMin(const Value& handle, const Value& coords,
338 const Value& value, TextureInstInfo info);
339 [[nodiscard]] Value ImageAtomicIMin(const Value& handle, const Value& coords,
340 const Value& value, bool is_signed, TextureInstInfo info);
341 [[nodiscard]] Value ImageAtomicSMax(const Value& handle, const Value& coords,
342 const Value& value, TextureInstInfo info);
343 [[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords,
344 const Value& value, TextureInstInfo info);
345 [[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords,
346 const Value& value, bool is_signed, TextureInstInfo info);
347 [[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
348 TextureInstInfo info);
349 [[nodiscard]] Value ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
350 TextureInstInfo info);
351 [[nodiscard]] Value ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
352 TextureInstInfo info);
353 [[nodiscard]] Value ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
354 TextureInstInfo info);
355 [[nodiscard]] Value ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
356 TextureInstInfo info);
357 [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
358 const Value& value, TextureInstInfo info);
359 [[nodiscard]] U1 VoteAll(const U1& value);
360 [[nodiscard]] U1 VoteAny(const U1& value);
361 [[nodiscard]] U1 VoteEqual(const U1& value);
362 [[nodiscard]] U32 SubgroupBallot(const U1& value);
363 [[nodiscard]] U32 SubgroupEqMask();
364 [[nodiscard]] U32 SubgroupLtMask();
365 [[nodiscard]] U32 SubgroupLeMask();
366 [[nodiscard]] U32 SubgroupGtMask();
367 [[nodiscard]] U32 SubgroupGeMask();
368 [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
369 const IR::U32& seg_mask);
370 [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
371 const IR::U32& seg_mask);
372 [[nodiscard]] U32 ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
373 const IR::U32& seg_mask);
374 [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index,
375 const IR::U32& clamp, const IR::U32& seg_mask);
376 [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle,
377 FpControl control = {});
378
379 [[nodiscard]] F32 DPdxFine(const F32& a);
380
381 [[nodiscard]] F32 DPdyFine(const F32& a);
382
383 [[nodiscard]] F32 DPdxCoarse(const F32& a);
384
385 [[nodiscard]] F32 DPdyCoarse(const F32& a);
386
387private:
388 IR::Block::iterator insertion_point;
389
390 template <typename T = Value, typename... Args>
391 T Inst(Opcode op, Args... args) {
392 auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})};
393 return T{Value{&*it}};
394 }
395
396 template <typename T>
397 requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) struct Flags {
398 Flags() = default;
399 Flags(T proxy_) : proxy{proxy_} {}
400
401 T proxy;
402 };
403
404 template <typename T = Value, typename FlagType, typename... Args>
405 T Inst(Opcode op, Flags<FlagType> flags, Args... args) {
406 u32 raw_flags{};
407 std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
408 auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
409 return T{Value{&*it}};
410 }
411};
412
413} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
new file mode 100644
index 000000000..3dfa5a880
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -0,0 +1,411 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/ir/type.h"
10#include "shader_recompiler/frontend/ir/value.h"
11
12namespace Shader::IR {
13namespace {
14void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
15 if (inst && inst->GetOpcode() != opcode) {
16 throw LogicError("Invalid pseudo-instruction");
17 }
18}
19
20void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) {
21 if (dest_inst) {
22 throw LogicError("Only one of each type of pseudo-op allowed");
23 }
24 dest_inst = pseudo_inst;
25}
26
27void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
28 if (inst->GetOpcode() != expected_opcode) {
29 throw LogicError("Undoing use of invalid pseudo-op");
30 }
31 inst = nullptr;
32}
33
34void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) {
35 if (!associated_insts) {
36 associated_insts = std::make_unique<AssociatedInsts>();
37 }
38}
39} // Anonymous namespace
40
41Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
42 if (op == Opcode::Phi) {
43 std::construct_at(&phi_args);
44 } else {
45 std::construct_at(&args);
46 }
47}
48
49Inst::~Inst() {
50 if (op == Opcode::Phi) {
51 std::destroy_at(&phi_args);
52 } else {
53 std::destroy_at(&args);
54 }
55}
56
57bool Inst::MayHaveSideEffects() const noexcept {
58 switch (op) {
59 case Opcode::ConditionRef:
60 case Opcode::Reference:
61 case Opcode::PhiMove:
62 case Opcode::Prologue:
63 case Opcode::Epilogue:
64 case Opcode::Join:
65 case Opcode::DemoteToHelperInvocation:
66 case Opcode::Barrier:
67 case Opcode::WorkgroupMemoryBarrier:
68 case Opcode::DeviceMemoryBarrier:
69 case Opcode::EmitVertex:
70 case Opcode::EndPrimitive:
71 case Opcode::SetAttribute:
72 case Opcode::SetAttributeIndexed:
73 case Opcode::SetPatch:
74 case Opcode::SetFragColor:
75 case Opcode::SetSampleMask:
76 case Opcode::SetFragDepth:
77 case Opcode::WriteGlobalU8:
78 case Opcode::WriteGlobalS8:
79 case Opcode::WriteGlobalU16:
80 case Opcode::WriteGlobalS16:
81 case Opcode::WriteGlobal32:
82 case Opcode::WriteGlobal64:
83 case Opcode::WriteGlobal128:
84 case Opcode::WriteStorageU8:
85 case Opcode::WriteStorageS8:
86 case Opcode::WriteStorageU16:
87 case Opcode::WriteStorageS16:
88 case Opcode::WriteStorage32:
89 case Opcode::WriteStorage64:
90 case Opcode::WriteStorage128:
91 case Opcode::WriteLocal:
92 case Opcode::WriteSharedU8:
93 case Opcode::WriteSharedU16:
94 case Opcode::WriteSharedU32:
95 case Opcode::WriteSharedU64:
96 case Opcode::WriteSharedU128:
97 case Opcode::SharedAtomicIAdd32:
98 case Opcode::SharedAtomicSMin32:
99 case Opcode::SharedAtomicUMin32:
100 case Opcode::SharedAtomicSMax32:
101 case Opcode::SharedAtomicUMax32:
102 case Opcode::SharedAtomicInc32:
103 case Opcode::SharedAtomicDec32:
104 case Opcode::SharedAtomicAnd32:
105 case Opcode::SharedAtomicOr32:
106 case Opcode::SharedAtomicXor32:
107 case Opcode::SharedAtomicExchange32:
108 case Opcode::SharedAtomicExchange64:
109 case Opcode::GlobalAtomicIAdd32:
110 case Opcode::GlobalAtomicSMin32:
111 case Opcode::GlobalAtomicUMin32:
112 case Opcode::GlobalAtomicSMax32:
113 case Opcode::GlobalAtomicUMax32:
114 case Opcode::GlobalAtomicInc32:
115 case Opcode::GlobalAtomicDec32:
116 case Opcode::GlobalAtomicAnd32:
117 case Opcode::GlobalAtomicOr32:
118 case Opcode::GlobalAtomicXor32:
119 case Opcode::GlobalAtomicExchange32:
120 case Opcode::GlobalAtomicIAdd64:
121 case Opcode::GlobalAtomicSMin64:
122 case Opcode::GlobalAtomicUMin64:
123 case Opcode::GlobalAtomicSMax64:
124 case Opcode::GlobalAtomicUMax64:
125 case Opcode::GlobalAtomicAnd64:
126 case Opcode::GlobalAtomicOr64:
127 case Opcode::GlobalAtomicXor64:
128 case Opcode::GlobalAtomicExchange64:
129 case Opcode::GlobalAtomicAddF32:
130 case Opcode::GlobalAtomicAddF16x2:
131 case Opcode::GlobalAtomicAddF32x2:
132 case Opcode::GlobalAtomicMinF16x2:
133 case Opcode::GlobalAtomicMinF32x2:
134 case Opcode::GlobalAtomicMaxF16x2:
135 case Opcode::GlobalAtomicMaxF32x2:
136 case Opcode::StorageAtomicIAdd32:
137 case Opcode::StorageAtomicSMin32:
138 case Opcode::StorageAtomicUMin32:
139 case Opcode::StorageAtomicSMax32:
140 case Opcode::StorageAtomicUMax32:
141 case Opcode::StorageAtomicInc32:
142 case Opcode::StorageAtomicDec32:
143 case Opcode::StorageAtomicAnd32:
144 case Opcode::StorageAtomicOr32:
145 case Opcode::StorageAtomicXor32:
146 case Opcode::StorageAtomicExchange32:
147 case Opcode::StorageAtomicIAdd64:
148 case Opcode::StorageAtomicSMin64:
149 case Opcode::StorageAtomicUMin64:
150 case Opcode::StorageAtomicSMax64:
151 case Opcode::StorageAtomicUMax64:
152 case Opcode::StorageAtomicAnd64:
153 case Opcode::StorageAtomicOr64:
154 case Opcode::StorageAtomicXor64:
155 case Opcode::StorageAtomicExchange64:
156 case Opcode::StorageAtomicAddF32:
157 case Opcode::StorageAtomicAddF16x2:
158 case Opcode::StorageAtomicAddF32x2:
159 case Opcode::StorageAtomicMinF16x2:
160 case Opcode::StorageAtomicMinF32x2:
161 case Opcode::StorageAtomicMaxF16x2:
162 case Opcode::StorageAtomicMaxF32x2:
163 case Opcode::BindlessImageWrite:
164 case Opcode::BoundImageWrite:
165 case Opcode::ImageWrite:
166 case IR::Opcode::BindlessImageAtomicIAdd32:
167 case IR::Opcode::BindlessImageAtomicSMin32:
168 case IR::Opcode::BindlessImageAtomicUMin32:
169 case IR::Opcode::BindlessImageAtomicSMax32:
170 case IR::Opcode::BindlessImageAtomicUMax32:
171 case IR::Opcode::BindlessImageAtomicInc32:
172 case IR::Opcode::BindlessImageAtomicDec32:
173 case IR::Opcode::BindlessImageAtomicAnd32:
174 case IR::Opcode::BindlessImageAtomicOr32:
175 case IR::Opcode::BindlessImageAtomicXor32:
176 case IR::Opcode::BindlessImageAtomicExchange32:
177 case IR::Opcode::BoundImageAtomicIAdd32:
178 case IR::Opcode::BoundImageAtomicSMin32:
179 case IR::Opcode::BoundImageAtomicUMin32:
180 case IR::Opcode::BoundImageAtomicSMax32:
181 case IR::Opcode::BoundImageAtomicUMax32:
182 case IR::Opcode::BoundImageAtomicInc32:
183 case IR::Opcode::BoundImageAtomicDec32:
184 case IR::Opcode::BoundImageAtomicAnd32:
185 case IR::Opcode::BoundImageAtomicOr32:
186 case IR::Opcode::BoundImageAtomicXor32:
187 case IR::Opcode::BoundImageAtomicExchange32:
188 case IR::Opcode::ImageAtomicIAdd32:
189 case IR::Opcode::ImageAtomicSMin32:
190 case IR::Opcode::ImageAtomicUMin32:
191 case IR::Opcode::ImageAtomicSMax32:
192 case IR::Opcode::ImageAtomicUMax32:
193 case IR::Opcode::ImageAtomicInc32:
194 case IR::Opcode::ImageAtomicDec32:
195 case IR::Opcode::ImageAtomicAnd32:
196 case IR::Opcode::ImageAtomicOr32:
197 case IR::Opcode::ImageAtomicXor32:
198 case IR::Opcode::ImageAtomicExchange32:
199 return true;
200 default:
201 return false;
202 }
203}
204
205bool Inst::IsPseudoInstruction() const noexcept {
206 switch (op) {
207 case Opcode::GetZeroFromOp:
208 case Opcode::GetSignFromOp:
209 case Opcode::GetCarryFromOp:
210 case Opcode::GetOverflowFromOp:
211 case Opcode::GetSparseFromOp:
212 case Opcode::GetInBoundsFromOp:
213 return true;
214 default:
215 return false;
216 }
217}
218
219bool Inst::AreAllArgsImmediates() const {
220 if (op == Opcode::Phi) {
221 throw LogicError("Testing for all arguments are immediates on phi instruction");
222 }
223 return std::all_of(args.begin(), args.begin() + NumArgs(),
224 [](const IR::Value& value) { return value.IsImmediate(); });
225}
226
227Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
228 if (!associated_insts) {
229 return nullptr;
230 }
231 switch (opcode) {
232 case Opcode::GetZeroFromOp:
233 CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp);
234 return associated_insts->zero_inst;
235 case Opcode::GetSignFromOp:
236 CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp);
237 return associated_insts->sign_inst;
238 case Opcode::GetCarryFromOp:
239 CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp);
240 return associated_insts->carry_inst;
241 case Opcode::GetOverflowFromOp:
242 CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp);
243 return associated_insts->overflow_inst;
244 case Opcode::GetSparseFromOp:
245 CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp);
246 return associated_insts->sparse_inst;
247 case Opcode::GetInBoundsFromOp:
248 CheckPseudoInstruction(associated_insts->in_bounds_inst, Opcode::GetInBoundsFromOp);
249 return associated_insts->in_bounds_inst;
250 default:
251 throw InvalidArgument("{} is not a pseudo-instruction", opcode);
252 }
253}
254
255IR::Type Inst::Type() const {
256 return TypeOf(op);
257}
258
259void Inst::SetArg(size_t index, Value value) {
260 if (index >= NumArgs()) {
261 throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op);
262 }
263 const IR::Value arg{Arg(index)};
264 if (!arg.IsImmediate()) {
265 UndoUse(arg);
266 }
267 if (!value.IsImmediate()) {
268 Use(value);
269 }
270 if (op == Opcode::Phi) {
271 phi_args[index].second = value;
272 } else {
273 args[index] = value;
274 }
275}
276
277Block* Inst::PhiBlock(size_t index) const {
278 if (op != Opcode::Phi) {
279 throw LogicError("{} is not a Phi instruction", op);
280 }
281 if (index >= phi_args.size()) {
282 throw InvalidArgument("Out of bounds argument index {} in phi instruction");
283 }
284 return phi_args[index].first;
285}
286
287void Inst::AddPhiOperand(Block* predecessor, const Value& value) {
288 if (!value.IsImmediate()) {
289 Use(value);
290 }
291 phi_args.emplace_back(predecessor, value);
292}
293
294void Inst::Invalidate() {
295 ClearArgs();
296 ReplaceOpcode(Opcode::Void);
297}
298
299void Inst::ClearArgs() {
300 if (op == Opcode::Phi) {
301 for (auto& pair : phi_args) {
302 IR::Value& value{pair.second};
303 if (!value.IsImmediate()) {
304 UndoUse(value);
305 }
306 }
307 phi_args.clear();
308 } else {
309 for (auto& value : args) {
310 if (!value.IsImmediate()) {
311 UndoUse(value);
312 }
313 }
314 // Reset arguments to null
315 // std::memset was measured to be faster on MSVC than std::ranges:fill
316 std::memset(reinterpret_cast<char*>(&args), 0, sizeof(args));
317 }
318}
319
320void Inst::ReplaceUsesWith(Value replacement) {
321 Invalidate();
322 ReplaceOpcode(Opcode::Identity);
323 if (!replacement.IsImmediate()) {
324 Use(replacement);
325 }
326 args[0] = replacement;
327}
328
329void Inst::ReplaceOpcode(IR::Opcode opcode) {
330 if (opcode == IR::Opcode::Phi) {
331 throw LogicError("Cannot transition into Phi");
332 }
333 if (op == Opcode::Phi) {
334 // Transition out of phi arguments into non-phi
335 std::destroy_at(&phi_args);
336 std::construct_at(&args);
337 }
338 op = opcode;
339}
340
341void Inst::Use(const Value& value) {
342 Inst* const inst{value.Inst()};
343 ++inst->use_count;
344
345 std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
346 switch (op) {
347 case Opcode::GetZeroFromOp:
348 AllocAssociatedInsts(assoc_inst);
349 SetPseudoInstruction(assoc_inst->zero_inst, this);
350 break;
351 case Opcode::GetSignFromOp:
352 AllocAssociatedInsts(assoc_inst);
353 SetPseudoInstruction(assoc_inst->sign_inst, this);
354 break;
355 case Opcode::GetCarryFromOp:
356 AllocAssociatedInsts(assoc_inst);
357 SetPseudoInstruction(assoc_inst->carry_inst, this);
358 break;
359 case Opcode::GetOverflowFromOp:
360 AllocAssociatedInsts(assoc_inst);
361 SetPseudoInstruction(assoc_inst->overflow_inst, this);
362 break;
363 case Opcode::GetSparseFromOp:
364 AllocAssociatedInsts(assoc_inst);
365 SetPseudoInstruction(assoc_inst->sparse_inst, this);
366 break;
367 case Opcode::GetInBoundsFromOp:
368 AllocAssociatedInsts(assoc_inst);
369 SetPseudoInstruction(assoc_inst->in_bounds_inst, this);
370 break;
371 default:
372 break;
373 }
374}
375
376void Inst::UndoUse(const Value& value) {
377 Inst* const inst{value.Inst()};
378 --inst->use_count;
379
380 std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
381 switch (op) {
382 case Opcode::GetZeroFromOp:
383 AllocAssociatedInsts(assoc_inst);
384 RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp);
385 break;
386 case Opcode::GetSignFromOp:
387 AllocAssociatedInsts(assoc_inst);
388 RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp);
389 break;
390 case Opcode::GetCarryFromOp:
391 AllocAssociatedInsts(assoc_inst);
392 RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp);
393 break;
394 case Opcode::GetOverflowFromOp:
395 AllocAssociatedInsts(assoc_inst);
396 RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp);
397 break;
398 case Opcode::GetSparseFromOp:
399 AllocAssociatedInsts(assoc_inst);
400 RemovePseudoInstruction(assoc_inst->sparse_inst, Opcode::GetSparseFromOp);
401 break;
402 case Opcode::GetInBoundsFromOp:
403 AllocAssociatedInsts(assoc_inst);
404 RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp);
405 break;
406 default:
407 break;
408 }
409}
410
411} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
new file mode 100644
index 000000000..77cda1f8a
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -0,0 +1,49 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/shader_info.h"
10
11namespace Shader::IR {
12
13enum class FmzMode : u8 {
14 DontCare, // Not specified for this instruction
15 FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK)
16 FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9)
17 None, // Denorms are not flushed, NAN is propagated (nouveau)
18};
19
20enum class FpRounding : u8 {
21 DontCare, // Not specified for this instruction
22 RN, // Round to nearest even,
23 RM, // Round towards negative infinity
24 RP, // Round towards positive infinity
25 RZ, // Round towards zero
26};
27
28struct FpControl {
29 bool no_contraction{false};
30 FpRounding rounding{FpRounding::DontCare};
31 FmzMode fmz_mode{FmzMode::DontCare};
32};
33static_assert(sizeof(FpControl) <= sizeof(u32));
34
35union TextureInstInfo {
36 u32 raw;
37 BitField<0, 16, u32> descriptor_index;
38 BitField<16, 3, TextureType> type;
39 BitField<19, 1, u32> is_depth;
40 BitField<20, 1, u32> has_bias;
41 BitField<21, 1, u32> has_lod_clamp;
42 BitField<22, 1, u32> relaxed_precision;
43 BitField<23, 2, u32> gather_component;
44 BitField<25, 2, u32> num_derivates;
45 BitField<27, 3, ImageFormat> image_format;
46};
47static_assert(sizeof(TextureInstInfo) <= sizeof(u32));
48
49} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp
new file mode 100644
index 000000000..24d024ad7
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/opcodes.cpp
@@ -0,0 +1,15 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/frontend/ir/opcodes.h"
8
9namespace Shader::IR {
10
11std::string_view NameOf(Opcode op) {
12 return Detail::META_TABLE[static_cast<size_t>(op)].name;
13}
14
15} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h
new file mode 100644
index 000000000..9ab108292
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/opcodes.h
@@ -0,0 +1,110 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <array>
9#include <string_view>
10
11#include <fmt/format.h>
12
13#include "shader_recompiler/frontend/ir/type.h"
14
15namespace Shader::IR {
16
17enum class Opcode {
18#define OPCODE(name, ...) name,
19#include "opcodes.inc"
20#undef OPCODE
21};
22
23namespace Detail {
24struct OpcodeMeta {
25 std::string_view name;
26 Type type;
27 std::array<Type, 5> arg_types;
28};
29
30// using enum Type;
31constexpr Type Void{Type::Void};
32constexpr Type Opaque{Type::Opaque};
33constexpr Type Reg{Type::Reg};
34constexpr Type Pred{Type::Pred};
35constexpr Type Attribute{Type::Attribute};
36constexpr Type Patch{Type::Patch};
37constexpr Type U1{Type::U1};
38constexpr Type U8{Type::U8};
39constexpr Type U16{Type::U16};
40constexpr Type U32{Type::U32};
41constexpr Type U64{Type::U64};
42constexpr Type F16{Type::F16};
43constexpr Type F32{Type::F32};
44constexpr Type F64{Type::F64};
45constexpr Type U32x2{Type::U32x2};
46constexpr Type U32x3{Type::U32x3};
47constexpr Type U32x4{Type::U32x4};
48constexpr Type F16x2{Type::F16x2};
49constexpr Type F16x3{Type::F16x3};
50constexpr Type F16x4{Type::F16x4};
51constexpr Type F32x2{Type::F32x2};
52constexpr Type F32x3{Type::F32x3};
53constexpr Type F32x4{Type::F32x4};
54constexpr Type F64x2{Type::F64x2};
55constexpr Type F64x3{Type::F64x3};
56constexpr Type F64x4{Type::F64x4};
57
58constexpr OpcodeMeta META_TABLE[]{
59#define OPCODE(name_token, type_token, ...) \
60 { \
61 .name{#name_token}, \
62 .type = type_token, \
63 .arg_types{__VA_ARGS__}, \
64 },
65#include "opcodes.inc"
66#undef OPCODE
67};
68constexpr size_t CalculateNumArgsOf(Opcode op) {
69 const auto& arg_types{META_TABLE[static_cast<size_t>(op)].arg_types};
70 return static_cast<size_t>(
71 std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void)));
72}
73
74constexpr u8 NUM_ARGS[]{
75#define OPCODE(name_token, type_token, ...) static_cast<u8>(CalculateNumArgsOf(Opcode::name_token)),
76#include "opcodes.inc"
77#undef OPCODE
78};
79} // namespace Detail
80
81/// Get return type of an opcode
82[[nodiscard]] inline Type TypeOf(Opcode op) noexcept {
83 return Detail::META_TABLE[static_cast<size_t>(op)].type;
84}
85
86/// Get the number of arguments an opcode accepts
87[[nodiscard]] inline size_t NumArgsOf(Opcode op) noexcept {
88 return static_cast<size_t>(Detail::NUM_ARGS[static_cast<size_t>(op)]);
89}
90
91/// Get the required type of an argument of an opcode
92[[nodiscard]] inline Type ArgTypeOf(Opcode op, size_t arg_index) noexcept {
93 return Detail::META_TABLE[static_cast<size_t>(op)].arg_types[arg_index];
94}
95
96/// Get the name of an opcode
97[[nodiscard]] std::string_view NameOf(Opcode op);
98
99} // namespace Shader::IR
100
101template <>
102struct fmt::formatter<Shader::IR::Opcode> {
103 constexpr auto parse(format_parse_context& ctx) {
104 return ctx.begin();
105 }
106 template <typename FormatContext>
107 auto format(const Shader::IR::Opcode& op, FormatContext& ctx) {
108 return format_to(ctx.out(), "{}", Shader::IR::NameOf(op));
109 }
110};
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
new file mode 100644
index 000000000..d91098c80
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -0,0 +1,550 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ...
6OPCODE(Phi, Opaque, )
7OPCODE(Identity, Opaque, Opaque, )
8OPCODE(Void, Void, )
9OPCODE(ConditionRef, U1, U1, )
10OPCODE(Reference, Void, Opaque, )
11OPCODE(PhiMove, Void, Opaque, Opaque, )
12
13// Special operations
14OPCODE(Prologue, Void, )
15OPCODE(Epilogue, Void, )
16OPCODE(Join, Void, )
17OPCODE(DemoteToHelperInvocation, Void, )
18OPCODE(EmitVertex, Void, U32, )
19OPCODE(EndPrimitive, Void, U32, )
20
21// Barriers
22OPCODE(Barrier, Void, )
23OPCODE(WorkgroupMemoryBarrier, Void, )
24OPCODE(DeviceMemoryBarrier, Void, )
25
26// Context getters/setters
27OPCODE(GetRegister, U32, Reg, )
28OPCODE(SetRegister, Void, Reg, U32, )
29OPCODE(GetPred, U1, Pred, )
30OPCODE(SetPred, Void, Pred, U1, )
31OPCODE(GetGotoVariable, U1, U32, )
32OPCODE(SetGotoVariable, Void, U32, U1, )
33OPCODE(GetIndirectBranchVariable, U32, )
34OPCODE(SetIndirectBranchVariable, Void, U32, )
35OPCODE(GetCbufU8, U32, U32, U32, )
36OPCODE(GetCbufS8, U32, U32, U32, )
37OPCODE(GetCbufU16, U32, U32, U32, )
38OPCODE(GetCbufS16, U32, U32, U32, )
39OPCODE(GetCbufU32, U32, U32, U32, )
40OPCODE(GetCbufF32, F32, U32, U32, )
41OPCODE(GetCbufU32x2, U32x2, U32, U32, )
42OPCODE(GetAttribute, F32, Attribute, U32, )
43OPCODE(SetAttribute, Void, Attribute, F32, U32, )
44OPCODE(GetAttributeIndexed, F32, U32, U32, )
45OPCODE(SetAttributeIndexed, Void, U32, F32, U32, )
46OPCODE(GetPatch, F32, Patch, )
47OPCODE(SetPatch, Void, Patch, F32, )
48OPCODE(SetFragColor, Void, U32, U32, F32, )
49OPCODE(SetSampleMask, Void, U32, )
50OPCODE(SetFragDepth, Void, F32, )
51OPCODE(GetZFlag, U1, Void, )
52OPCODE(GetSFlag, U1, Void, )
53OPCODE(GetCFlag, U1, Void, )
54OPCODE(GetOFlag, U1, Void, )
55OPCODE(SetZFlag, Void, U1, )
56OPCODE(SetSFlag, Void, U1, )
57OPCODE(SetCFlag, Void, U1, )
58OPCODE(SetOFlag, Void, U1, )
59OPCODE(WorkgroupId, U32x3, )
60OPCODE(LocalInvocationId, U32x3, )
61OPCODE(InvocationId, U32, )
62OPCODE(SampleId, U32, )
63OPCODE(IsHelperInvocation, U1, )
64OPCODE(YDirection, F32, )
65
66// Undefined
67OPCODE(UndefU1, U1, )
68OPCODE(UndefU8, U8, )
69OPCODE(UndefU16, U16, )
70OPCODE(UndefU32, U32, )
71OPCODE(UndefU64, U64, )
72
73// Memory operations
74OPCODE(LoadGlobalU8, U32, Opaque, )
75OPCODE(LoadGlobalS8, U32, Opaque, )
76OPCODE(LoadGlobalU16, U32, Opaque, )
77OPCODE(LoadGlobalS16, U32, Opaque, )
78OPCODE(LoadGlobal32, U32, Opaque, )
79OPCODE(LoadGlobal64, U32x2, Opaque, )
80OPCODE(LoadGlobal128, U32x4, Opaque, )
81OPCODE(WriteGlobalU8, Void, Opaque, U32, )
82OPCODE(WriteGlobalS8, Void, Opaque, U32, )
83OPCODE(WriteGlobalU16, Void, Opaque, U32, )
84OPCODE(WriteGlobalS16, Void, Opaque, U32, )
85OPCODE(WriteGlobal32, Void, Opaque, U32, )
86OPCODE(WriteGlobal64, Void, Opaque, U32x2, )
87OPCODE(WriteGlobal128, Void, Opaque, U32x4, )
88
89// Storage buffer operations
90OPCODE(LoadStorageU8, U32, U32, U32, )
91OPCODE(LoadStorageS8, U32, U32, U32, )
92OPCODE(LoadStorageU16, U32, U32, U32, )
93OPCODE(LoadStorageS16, U32, U32, U32, )
94OPCODE(LoadStorage32, U32, U32, U32, )
95OPCODE(LoadStorage64, U32x2, U32, U32, )
96OPCODE(LoadStorage128, U32x4, U32, U32, )
97OPCODE(WriteStorageU8, Void, U32, U32, U32, )
98OPCODE(WriteStorageS8, Void, U32, U32, U32, )
99OPCODE(WriteStorageU16, Void, U32, U32, U32, )
100OPCODE(WriteStorageS16, Void, U32, U32, U32, )
101OPCODE(WriteStorage32, Void, U32, U32, U32, )
102OPCODE(WriteStorage64, Void, U32, U32, U32x2, )
103OPCODE(WriteStorage128, Void, U32, U32, U32x4, )
104
105// Local memory operations
106OPCODE(LoadLocal, U32, U32, )
107OPCODE(WriteLocal, Void, U32, U32, )
108
109// Shared memory operations
110OPCODE(LoadSharedU8, U32, U32, )
111OPCODE(LoadSharedS8, U32, U32, )
112OPCODE(LoadSharedU16, U32, U32, )
113OPCODE(LoadSharedS16, U32, U32, )
114OPCODE(LoadSharedU32, U32, U32, )
115OPCODE(LoadSharedU64, U32x2, U32, )
116OPCODE(LoadSharedU128, U32x4, U32, )
117OPCODE(WriteSharedU8, Void, U32, U32, )
118OPCODE(WriteSharedU16, Void, U32, U32, )
119OPCODE(WriteSharedU32, Void, U32, U32, )
120OPCODE(WriteSharedU64, Void, U32, U32x2, )
121OPCODE(WriteSharedU128, Void, U32, U32x4, )
122
123// Vector utility
124OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
125OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )
126OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, )
127OPCODE(CompositeExtractU32x2, U32, U32x2, U32, )
128OPCODE(CompositeExtractU32x3, U32, U32x3, U32, )
129OPCODE(CompositeExtractU32x4, U32, U32x4, U32, )
130OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, )
131OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, )
132OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, )
133OPCODE(CompositeConstructF16x2, F16x2, F16, F16, )
134OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, )
135OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, )
136OPCODE(CompositeExtractF16x2, F16, F16x2, U32, )
137OPCODE(CompositeExtractF16x3, F16, F16x3, U32, )
138OPCODE(CompositeExtractF16x4, F16, F16x4, U32, )
139OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, )
140OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, )
141OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, )
142OPCODE(CompositeConstructF32x2, F32x2, F32, F32, )
143OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, )
144OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, )
145OPCODE(CompositeExtractF32x2, F32, F32x2, U32, )
146OPCODE(CompositeExtractF32x3, F32, F32x3, U32, )
147OPCODE(CompositeExtractF32x4, F32, F32x4, U32, )
148OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, )
149OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, )
150OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, )
151OPCODE(CompositeConstructF64x2, F64x2, F64, F64, )
152OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, )
153OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, )
154OPCODE(CompositeExtractF64x2, F64, F64x2, U32, )
155OPCODE(CompositeExtractF64x3, F64, F64x3, U32, )
156OPCODE(CompositeExtractF64x4, F64, F64x4, U32, )
157OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, )
158OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, )
159OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, )
160
161// Select operations
162OPCODE(SelectU1, U1, U1, U1, U1, )
163OPCODE(SelectU8, U8, U1, U8, U8, )
164OPCODE(SelectU16, U16, U1, U16, U16, )
165OPCODE(SelectU32, U32, U1, U32, U32, )
166OPCODE(SelectU64, U64, U1, U64, U64, )
167OPCODE(SelectF16, F16, U1, F16, F16, )
168OPCODE(SelectF32, F32, U1, F32, F32, )
169OPCODE(SelectF64, F64, U1, F64, F64, )
170
171// Bitwise conversions
172OPCODE(BitCastU16F16, U16, F16, )
173OPCODE(BitCastU32F32, U32, F32, )
174OPCODE(BitCastU64F64, U64, F64, )
175OPCODE(BitCastF16U16, F16, U16, )
176OPCODE(BitCastF32U32, F32, U32, )
177OPCODE(BitCastF64U64, F64, U64, )
178OPCODE(PackUint2x32, U64, U32x2, )
179OPCODE(UnpackUint2x32, U32x2, U64, )
180OPCODE(PackFloat2x16, U32, F16x2, )
181OPCODE(UnpackFloat2x16, F16x2, U32, )
182OPCODE(PackHalf2x16, U32, F32x2, )
183OPCODE(UnpackHalf2x16, F32x2, U32, )
184OPCODE(PackDouble2x32, F64, U32x2, )
185OPCODE(UnpackDouble2x32, U32x2, F64, )
186
187// Pseudo-operation, handled specially at final emit
188OPCODE(GetZeroFromOp, U1, Opaque, )
189OPCODE(GetSignFromOp, U1, Opaque, )
190OPCODE(GetCarryFromOp, U1, Opaque, )
191OPCODE(GetOverflowFromOp, U1, Opaque, )
192OPCODE(GetSparseFromOp, U1, Opaque, )
193OPCODE(GetInBoundsFromOp, U1, Opaque, )
194
195// Floating-point operations
196OPCODE(FPAbs16, F16, F16, )
197OPCODE(FPAbs32, F32, F32, )
198OPCODE(FPAbs64, F64, F64, )
199OPCODE(FPAdd16, F16, F16, F16, )
200OPCODE(FPAdd32, F32, F32, F32, )
201OPCODE(FPAdd64, F64, F64, F64, )
202OPCODE(FPFma16, F16, F16, F16, F16, )
203OPCODE(FPFma32, F32, F32, F32, F32, )
204OPCODE(FPFma64, F64, F64, F64, F64, )
205OPCODE(FPMax32, F32, F32, F32, )
206OPCODE(FPMax64, F64, F64, F64, )
207OPCODE(FPMin32, F32, F32, F32, )
208OPCODE(FPMin64, F64, F64, F64, )
209OPCODE(FPMul16, F16, F16, F16, )
210OPCODE(FPMul32, F32, F32, F32, )
211OPCODE(FPMul64, F64, F64, F64, )
212OPCODE(FPNeg16, F16, F16, )
213OPCODE(FPNeg32, F32, F32, )
214OPCODE(FPNeg64, F64, F64, )
215OPCODE(FPRecip32, F32, F32, )
216OPCODE(FPRecip64, F64, F64, )
217OPCODE(FPRecipSqrt32, F32, F32, )
218OPCODE(FPRecipSqrt64, F64, F64, )
219OPCODE(FPSqrt, F32, F32, )
220OPCODE(FPSin, F32, F32, )
221OPCODE(FPExp2, F32, F32, )
222OPCODE(FPCos, F32, F32, )
223OPCODE(FPLog2, F32, F32, )
224OPCODE(FPSaturate16, F16, F16, )
225OPCODE(FPSaturate32, F32, F32, )
226OPCODE(FPSaturate64, F64, F64, )
227OPCODE(FPClamp16, F16, F16, F16, F16, )
228OPCODE(FPClamp32, F32, F32, F32, F32, )
229OPCODE(FPClamp64, F64, F64, F64, F64, )
230OPCODE(FPRoundEven16, F16, F16, )
231OPCODE(FPRoundEven32, F32, F32, )
232OPCODE(FPRoundEven64, F64, F64, )
233OPCODE(FPFloor16, F16, F16, )
234OPCODE(FPFloor32, F32, F32, )
235OPCODE(FPFloor64, F64, F64, )
236OPCODE(FPCeil16, F16, F16, )
237OPCODE(FPCeil32, F32, F32, )
238OPCODE(FPCeil64, F64, F64, )
239OPCODE(FPTrunc16, F16, F16, )
240OPCODE(FPTrunc32, F32, F32, )
241OPCODE(FPTrunc64, F64, F64, )
242
243OPCODE(FPOrdEqual16, U1, F16, F16, )
244OPCODE(FPOrdEqual32, U1, F32, F32, )
245OPCODE(FPOrdEqual64, U1, F64, F64, )
246OPCODE(FPUnordEqual16, U1, F16, F16, )
247OPCODE(FPUnordEqual32, U1, F32, F32, )
248OPCODE(FPUnordEqual64, U1, F64, F64, )
249OPCODE(FPOrdNotEqual16, U1, F16, F16, )
250OPCODE(FPOrdNotEqual32, U1, F32, F32, )
251OPCODE(FPOrdNotEqual64, U1, F64, F64, )
252OPCODE(FPUnordNotEqual16, U1, F16, F16, )
253OPCODE(FPUnordNotEqual32, U1, F32, F32, )
254OPCODE(FPUnordNotEqual64, U1, F64, F64, )
255OPCODE(FPOrdLessThan16, U1, F16, F16, )
256OPCODE(FPOrdLessThan32, U1, F32, F32, )
257OPCODE(FPOrdLessThan64, U1, F64, F64, )
258OPCODE(FPUnordLessThan16, U1, F16, F16, )
259OPCODE(FPUnordLessThan32, U1, F32, F32, )
260OPCODE(FPUnordLessThan64, U1, F64, F64, )
261OPCODE(FPOrdGreaterThan16, U1, F16, F16, )
262OPCODE(FPOrdGreaterThan32, U1, F32, F32, )
263OPCODE(FPOrdGreaterThan64, U1, F64, F64, )
264OPCODE(FPUnordGreaterThan16, U1, F16, F16, )
265OPCODE(FPUnordGreaterThan32, U1, F32, F32, )
266OPCODE(FPUnordGreaterThan64, U1, F64, F64, )
267OPCODE(FPOrdLessThanEqual16, U1, F16, F16, )
268OPCODE(FPOrdLessThanEqual32, U1, F32, F32, )
269OPCODE(FPOrdLessThanEqual64, U1, F64, F64, )
270OPCODE(FPUnordLessThanEqual16, U1, F16, F16, )
271OPCODE(FPUnordLessThanEqual32, U1, F32, F32, )
272OPCODE(FPUnordLessThanEqual64, U1, F64, F64, )
273OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, )
274OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, )
275OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, )
276OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, )
277OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, )
278OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
279OPCODE(FPIsNan16, U1, F16, )
280OPCODE(FPIsNan32, U1, F32, )
281OPCODE(FPIsNan64, U1, F64, )
282
283// Integer operations
284OPCODE(IAdd32, U32, U32, U32, )
285OPCODE(IAdd64, U64, U64, U64, )
286OPCODE(ISub32, U32, U32, U32, )
287OPCODE(ISub64, U64, U64, U64, )
288OPCODE(IMul32, U32, U32, U32, )
289OPCODE(INeg32, U32, U32, )
290OPCODE(INeg64, U64, U64, )
291OPCODE(IAbs32, U32, U32, )
292OPCODE(ShiftLeftLogical32, U32, U32, U32, )
293OPCODE(ShiftLeftLogical64, U64, U64, U32, )
294OPCODE(ShiftRightLogical32, U32, U32, U32, )
295OPCODE(ShiftRightLogical64, U64, U64, U32, )
296OPCODE(ShiftRightArithmetic32, U32, U32, U32, )
297OPCODE(ShiftRightArithmetic64, U64, U64, U32, )
298OPCODE(BitwiseAnd32, U32, U32, U32, )
299OPCODE(BitwiseOr32, U32, U32, U32, )
300OPCODE(BitwiseXor32, U32, U32, U32, )
301OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, )
302OPCODE(BitFieldSExtract, U32, U32, U32, U32, )
303OPCODE(BitFieldUExtract, U32, U32, U32, U32, )
304OPCODE(BitReverse32, U32, U32, )
305OPCODE(BitCount32, U32, U32, )
306OPCODE(BitwiseNot32, U32, U32, )
307
308OPCODE(FindSMsb32, U32, U32, )
309OPCODE(FindUMsb32, U32, U32, )
310OPCODE(SMin32, U32, U32, U32, )
311OPCODE(UMin32, U32, U32, U32, )
312OPCODE(SMax32, U32, U32, U32, )
313OPCODE(UMax32, U32, U32, U32, )
314OPCODE(SClamp32, U32, U32, U32, U32, )
315OPCODE(UClamp32, U32, U32, U32, U32, )
316OPCODE(SLessThan, U1, U32, U32, )
317OPCODE(ULessThan, U1, U32, U32, )
318OPCODE(IEqual, U1, U32, U32, )
319OPCODE(SLessThanEqual, U1, U32, U32, )
320OPCODE(ULessThanEqual, U1, U32, U32, )
321OPCODE(SGreaterThan, U1, U32, U32, )
322OPCODE(UGreaterThan, U1, U32, U32, )
323OPCODE(INotEqual, U1, U32, U32, )
324OPCODE(SGreaterThanEqual, U1, U32, U32, )
325OPCODE(UGreaterThanEqual, U1, U32, U32, )
326
327// Atomic operations
328OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
329OPCODE(SharedAtomicSMin32, U32, U32, U32, )
330OPCODE(SharedAtomicUMin32, U32, U32, U32, )
331OPCODE(SharedAtomicSMax32, U32, U32, U32, )
332OPCODE(SharedAtomicUMax32, U32, U32, U32, )
333OPCODE(SharedAtomicInc32, U32, U32, U32, )
334OPCODE(SharedAtomicDec32, U32, U32, U32, )
335OPCODE(SharedAtomicAnd32, U32, U32, U32, )
336OPCODE(SharedAtomicOr32, U32, U32, U32, )
337OPCODE(SharedAtomicXor32, U32, U32, U32, )
338OPCODE(SharedAtomicExchange32, U32, U32, U32, )
339OPCODE(SharedAtomicExchange64, U64, U32, U64, )
340
341OPCODE(GlobalAtomicIAdd32, U32, U64, U32, )
342OPCODE(GlobalAtomicSMin32, U32, U64, U32, )
343OPCODE(GlobalAtomicUMin32, U32, U64, U32, )
344OPCODE(GlobalAtomicSMax32, U32, U64, U32, )
345OPCODE(GlobalAtomicUMax32, U32, U64, U32, )
346OPCODE(GlobalAtomicInc32, U32, U64, U32, )
347OPCODE(GlobalAtomicDec32, U32, U64, U32, )
348OPCODE(GlobalAtomicAnd32, U32, U64, U32, )
349OPCODE(GlobalAtomicOr32, U32, U64, U32, )
350OPCODE(GlobalAtomicXor32, U32, U64, U32, )
351OPCODE(GlobalAtomicExchange32, U32, U64, U32, )
352OPCODE(GlobalAtomicIAdd64, U64, U64, U64, )
353OPCODE(GlobalAtomicSMin64, U64, U64, U64, )
354OPCODE(GlobalAtomicUMin64, U64, U64, U64, )
355OPCODE(GlobalAtomicSMax64, U64, U64, U64, )
356OPCODE(GlobalAtomicUMax64, U64, U64, U64, )
357OPCODE(GlobalAtomicAnd64, U64, U64, U64, )
358OPCODE(GlobalAtomicOr64, U64, U64, U64, )
359OPCODE(GlobalAtomicXor64, U64, U64, U64, )
360OPCODE(GlobalAtomicExchange64, U64, U64, U64, )
361OPCODE(GlobalAtomicAddF32, F32, U64, F32, )
362OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, )
363OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, )
364OPCODE(GlobalAtomicMinF16x2, U32, U64, F16x2, )
365OPCODE(GlobalAtomicMinF32x2, U32, U64, F32x2, )
366OPCODE(GlobalAtomicMaxF16x2, U32, U64, F16x2, )
367OPCODE(GlobalAtomicMaxF32x2, U32, U64, F32x2, )
368
369OPCODE(StorageAtomicIAdd32, U32, U32, U32, U32, )
370OPCODE(StorageAtomicSMin32, U32, U32, U32, U32, )
371OPCODE(StorageAtomicUMin32, U32, U32, U32, U32, )
372OPCODE(StorageAtomicSMax32, U32, U32, U32, U32, )
373OPCODE(StorageAtomicUMax32, U32, U32, U32, U32, )
374OPCODE(StorageAtomicInc32, U32, U32, U32, U32, )
375OPCODE(StorageAtomicDec32, U32, U32, U32, U32, )
376OPCODE(StorageAtomicAnd32, U32, U32, U32, U32, )
377OPCODE(StorageAtomicOr32, U32, U32, U32, U32, )
378OPCODE(StorageAtomicXor32, U32, U32, U32, U32, )
379OPCODE(StorageAtomicExchange32, U32, U32, U32, U32, )
380OPCODE(StorageAtomicIAdd64, U64, U32, U32, U64, )
381OPCODE(StorageAtomicSMin64, U64, U32, U32, U64, )
382OPCODE(StorageAtomicUMin64, U64, U32, U32, U64, )
383OPCODE(StorageAtomicSMax64, U64, U32, U32, U64, )
384OPCODE(StorageAtomicUMax64, U64, U32, U32, U64, )
385OPCODE(StorageAtomicAnd64, U64, U32, U32, U64, )
386OPCODE(StorageAtomicOr64, U64, U32, U32, U64, )
387OPCODE(StorageAtomicXor64, U64, U32, U32, U64, )
388OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, )
389OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, )
390OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, )
391OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, )
392OPCODE(StorageAtomicMinF16x2, U32, U32, U32, F16x2, )
393OPCODE(StorageAtomicMinF32x2, U32, U32, U32, F32x2, )
394OPCODE(StorageAtomicMaxF16x2, U32, U32, U32, F16x2, )
395OPCODE(StorageAtomicMaxF32x2, U32, U32, U32, F32x2, )
396
397// Logical operations
398OPCODE(LogicalOr, U1, U1, U1, )
399OPCODE(LogicalAnd, U1, U1, U1, )
400OPCODE(LogicalXor, U1, U1, U1, )
401OPCODE(LogicalNot, U1, U1, )
402
403// Conversion operations
404OPCODE(ConvertS16F16, U32, F16, )
405OPCODE(ConvertS16F32, U32, F32, )
406OPCODE(ConvertS16F64, U32, F64, )
407OPCODE(ConvertS32F16, U32, F16, )
408OPCODE(ConvertS32F32, U32, F32, )
409OPCODE(ConvertS32F64, U32, F64, )
410OPCODE(ConvertS64F16, U64, F16, )
411OPCODE(ConvertS64F32, U64, F32, )
412OPCODE(ConvertS64F64, U64, F64, )
413OPCODE(ConvertU16F16, U32, F16, )
414OPCODE(ConvertU16F32, U32, F32, )
415OPCODE(ConvertU16F64, U32, F64, )
416OPCODE(ConvertU32F16, U32, F16, )
417OPCODE(ConvertU32F32, U32, F32, )
418OPCODE(ConvertU32F64, U32, F64, )
419OPCODE(ConvertU64F16, U64, F16, )
420OPCODE(ConvertU64F32, U64, F32, )
421OPCODE(ConvertU64F64, U64, F64, )
422OPCODE(ConvertU64U32, U64, U32, )
423OPCODE(ConvertU32U64, U32, U64, )
424OPCODE(ConvertF16F32, F16, F32, )
425OPCODE(ConvertF32F16, F32, F16, )
426OPCODE(ConvertF32F64, F32, F64, )
427OPCODE(ConvertF64F32, F64, F32, )
428OPCODE(ConvertF16S8, F16, U32, )
429OPCODE(ConvertF16S16, F16, U32, )
430OPCODE(ConvertF16S32, F16, U32, )
431OPCODE(ConvertF16S64, F16, U64, )
432OPCODE(ConvertF16U8, F16, U32, )
433OPCODE(ConvertF16U16, F16, U32, )
434OPCODE(ConvertF16U32, F16, U32, )
435OPCODE(ConvertF16U64, F16, U64, )
436OPCODE(ConvertF32S8, F32, U32, )
437OPCODE(ConvertF32S16, F32, U32, )
438OPCODE(ConvertF32S32, F32, U32, )
439OPCODE(ConvertF32S64, F32, U64, )
440OPCODE(ConvertF32U8, F32, U32, )
441OPCODE(ConvertF32U16, F32, U32, )
442OPCODE(ConvertF32U32, F32, U32, )
443OPCODE(ConvertF32U64, F32, U64, )
444OPCODE(ConvertF64S8, F64, U32, )
445OPCODE(ConvertF64S16, F64, U32, )
446OPCODE(ConvertF64S32, F64, U32, )
447OPCODE(ConvertF64S64, F64, U64, )
448OPCODE(ConvertF64U8, F64, U32, )
449OPCODE(ConvertF64U16, F64, U32, )
450OPCODE(ConvertF64U32, F64, U32, )
451OPCODE(ConvertF64U64, F64, U64, )
452
453// Image operations
454OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
455OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
456OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
457OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
458OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, )
459OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, )
460OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, )
461OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, )
462OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, )
463OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, )
464OPCODE(BindlessImageRead, U32x4, U32, Opaque, )
465OPCODE(BindlessImageWrite, Void, U32, Opaque, U32x4, )
466
467OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
468OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
469OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
470OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
471OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, )
472OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, )
473OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, )
474OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, )
475OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, )
476OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, )
477OPCODE(BoundImageRead, U32x4, U32, Opaque, )
478OPCODE(BoundImageWrite, Void, U32, Opaque, U32x4, )
479
480OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
481OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
482OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, )
483OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, )
484OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, )
485OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, )
486OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, )
487OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, )
488OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
489OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, )
490OPCODE(ImageRead, U32x4, Opaque, Opaque, )
491OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
492
493// Atomic Image operations
494
495OPCODE(BindlessImageAtomicIAdd32, U32, U32, Opaque, U32, )
496OPCODE(BindlessImageAtomicSMin32, U32, U32, Opaque, U32, )
497OPCODE(BindlessImageAtomicUMin32, U32, U32, Opaque, U32, )
498OPCODE(BindlessImageAtomicSMax32, U32, U32, Opaque, U32, )
499OPCODE(BindlessImageAtomicUMax32, U32, U32, Opaque, U32, )
500OPCODE(BindlessImageAtomicInc32, U32, U32, Opaque, U32, )
501OPCODE(BindlessImageAtomicDec32, U32, U32, Opaque, U32, )
502OPCODE(BindlessImageAtomicAnd32, U32, U32, Opaque, U32, )
503OPCODE(BindlessImageAtomicOr32, U32, U32, Opaque, U32, )
504OPCODE(BindlessImageAtomicXor32, U32, U32, Opaque, U32, )
505OPCODE(BindlessImageAtomicExchange32, U32, U32, Opaque, U32, )
506
507OPCODE(BoundImageAtomicIAdd32, U32, U32, Opaque, U32, )
508OPCODE(BoundImageAtomicSMin32, U32, U32, Opaque, U32, )
509OPCODE(BoundImageAtomicUMin32, U32, U32, Opaque, U32, )
510OPCODE(BoundImageAtomicSMax32, U32, U32, Opaque, U32, )
511OPCODE(BoundImageAtomicUMax32, U32, U32, Opaque, U32, )
512OPCODE(BoundImageAtomicInc32, U32, U32, Opaque, U32, )
513OPCODE(BoundImageAtomicDec32, U32, U32, Opaque, U32, )
514OPCODE(BoundImageAtomicAnd32, U32, U32, Opaque, U32, )
515OPCODE(BoundImageAtomicOr32, U32, U32, Opaque, U32, )
516OPCODE(BoundImageAtomicXor32, U32, U32, Opaque, U32, )
517OPCODE(BoundImageAtomicExchange32, U32, U32, Opaque, U32, )
518
519OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
520OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, )
521OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
522OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
523OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
524OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
525OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
526OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
527OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, )
528OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
529OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
530
531// Warp operations
532OPCODE(LaneId, U32, )
533OPCODE(VoteAll, U1, U1, )
534OPCODE(VoteAny, U1, U1, )
535OPCODE(VoteEqual, U1, U1, )
536OPCODE(SubgroupBallot, U32, U1, )
537OPCODE(SubgroupEqMask, U32, )
538OPCODE(SubgroupLtMask, U32, )
539OPCODE(SubgroupLeMask, U32, )
540OPCODE(SubgroupGtMask, U32, )
541OPCODE(SubgroupGeMask, U32, )
542OPCODE(ShuffleIndex, U32, U32, U32, U32, U32, )
543OPCODE(ShuffleUp, U32, U32, U32, U32, U32, )
544OPCODE(ShuffleDown, U32, U32, U32, U32, U32, )
545OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, )
546OPCODE(FSwizzleAdd, F32, F32, F32, U32, )
547OPCODE(DPdxFine, F32, F32, )
548OPCODE(DPdyFine, F32, F32, )
549OPCODE(DPdxCoarse, F32, F32, )
550OPCODE(DPdyCoarse, F32, F32, )
diff --git a/src/shader_recompiler/frontend/ir/patch.cpp b/src/shader_recompiler/frontend/ir/patch.cpp
new file mode 100644
index 000000000..4c956a970
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/patch.cpp
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/exception.h"
6#include "shader_recompiler/frontend/ir/patch.h"
7
8namespace Shader::IR {
9
10bool IsGeneric(Patch patch) noexcept {
11 return patch >= Patch::Component0 && patch <= Patch::Component119;
12}
13
14u32 GenericPatchIndex(Patch patch) {
15 if (!IsGeneric(patch)) {
16 throw InvalidArgument("Patch {} is not generic", patch);
17 }
18 return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4;
19}
20
21u32 GenericPatchElement(Patch patch) {
22 if (!IsGeneric(patch)) {
23 throw InvalidArgument("Patch {} is not generic", patch);
24 }
25 return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4;
26}
27
28} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/patch.h b/src/shader_recompiler/frontend/ir/patch.h
new file mode 100644
index 000000000..6d66ff0d6
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/patch.h
@@ -0,0 +1,149 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Shader::IR {
10
11enum class Patch : u64 {
12 TessellationLodLeft,
13 TessellationLodTop,
14 TessellationLodRight,
15 TessellationLodBottom,
16 TessellationLodInteriorU,
17 TessellationLodInteriorV,
18 ComponentPadding0,
19 ComponentPadding1,
20 Component0,
21 Component1,
22 Component2,
23 Component3,
24 Component4,
25 Component5,
26 Component6,
27 Component7,
28 Component8,
29 Component9,
30 Component10,
31 Component11,
32 Component12,
33 Component13,
34 Component14,
35 Component15,
36 Component16,
37 Component17,
38 Component18,
39 Component19,
40 Component20,
41 Component21,
42 Component22,
43 Component23,
44 Component24,
45 Component25,
46 Component26,
47 Component27,
48 Component28,
49 Component29,
50 Component30,
51 Component31,
52 Component32,
53 Component33,
54 Component34,
55 Component35,
56 Component36,
57 Component37,
58 Component38,
59 Component39,
60 Component40,
61 Component41,
62 Component42,
63 Component43,
64 Component44,
65 Component45,
66 Component46,
67 Component47,
68 Component48,
69 Component49,
70 Component50,
71 Component51,
72 Component52,
73 Component53,
74 Component54,
75 Component55,
76 Component56,
77 Component57,
78 Component58,
79 Component59,
80 Component60,
81 Component61,
82 Component62,
83 Component63,
84 Component64,
85 Component65,
86 Component66,
87 Component67,
88 Component68,
89 Component69,
90 Component70,
91 Component71,
92 Component72,
93 Component73,
94 Component74,
95 Component75,
96 Component76,
97 Component77,
98 Component78,
99 Component79,
100 Component80,
101 Component81,
102 Component82,
103 Component83,
104 Component84,
105 Component85,
106 Component86,
107 Component87,
108 Component88,
109 Component89,
110 Component90,
111 Component91,
112 Component92,
113 Component93,
114 Component94,
115 Component95,
116 Component96,
117 Component97,
118 Component98,
119 Component99,
120 Component100,
121 Component101,
122 Component102,
123 Component103,
124 Component104,
125 Component105,
126 Component106,
127 Component107,
128 Component108,
129 Component109,
130 Component110,
131 Component111,
132 Component112,
133 Component113,
134 Component114,
135 Component115,
136 Component116,
137 Component117,
138 Component118,
139 Component119,
140};
141static_assert(static_cast<u64>(Patch::Component119) == 127);
142
143[[nodiscard]] bool IsGeneric(Patch patch) noexcept;
144
145[[nodiscard]] u32 GenericPatchIndex(Patch patch);
146
147[[nodiscard]] u32 GenericPatchElement(Patch patch);
148
149} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp
new file mode 100644
index 000000000..16bc44101
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/post_order.cpp
@@ -0,0 +1,46 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include <boost/container/flat_set.hpp>
8#include <boost/container/small_vector.hpp>
9
10#include "shader_recompiler/frontend/ir/basic_block.h"
11#include "shader_recompiler/frontend/ir/post_order.h"
12
13namespace Shader::IR {
14
15BlockList PostOrder(const AbstractSyntaxNode& root) {
16 boost::container::small_vector<Block*, 16> block_stack;
17 boost::container::flat_set<Block*> visited;
18 BlockList post_order_blocks;
19
20 if (root.type != AbstractSyntaxNode::Type::Block) {
21 throw LogicError("First node in abstract syntax list root is not a block");
22 }
23 Block* const first_block{root.data.block};
24 visited.insert(first_block);
25 block_stack.push_back(first_block);
26
27 while (!block_stack.empty()) {
28 Block* const block{block_stack.back()};
29 const auto visit{[&](Block* branch) {
30 if (!visited.insert(branch).second) {
31 return false;
32 }
33 // Calling push_back twice is faster than insert on MSVC
34 block_stack.push_back(block);
35 block_stack.push_back(branch);
36 return true;
37 }};
38 block_stack.pop_back();
39 if (std::ranges::none_of(block->ImmSuccessors(), visit)) {
40 post_order_blocks.push_back(block);
41 }
42 }
43 return post_order_blocks;
44}
45
46} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/post_order.h b/src/shader_recompiler/frontend/ir/post_order.h
new file mode 100644
index 000000000..07bfbadc3
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/post_order.h
@@ -0,0 +1,14 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9
10namespace Shader::IR {
11
12BlockList PostOrder(const AbstractSyntaxNode& root);
13
14} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h
new file mode 100644
index 000000000..4e7f32423
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/pred.h
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <fmt/format.h>
8
9namespace Shader::IR {
10
11enum class Pred : u64 {
12 P0,
13 P1,
14 P2,
15 P3,
16 P4,
17 P5,
18 P6,
19 PT,
20};
21
22constexpr size_t NUM_USER_PREDS = 7;
23constexpr size_t NUM_PREDS = 8;
24
25[[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept {
26 return static_cast<size_t>(pred);
27}
28
29} // namespace Shader::IR
30
31template <>
32struct fmt::formatter<Shader::IR::Pred> {
33 constexpr auto parse(format_parse_context& ctx) {
34 return ctx.begin();
35 }
36 template <typename FormatContext>
37 auto format(const Shader::IR::Pred& pred, FormatContext& ctx) {
38 if (pred == Shader::IR::Pred::PT) {
39 return fmt::format_to(ctx.out(), "PT");
40 } else {
41 return fmt::format_to(ctx.out(), "P{}", static_cast<int>(pred));
42 }
43 }
44};
diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp
new file mode 100644
index 000000000..3fc06f855
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/program.cpp
@@ -0,0 +1,32 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <map>
6#include <string>
7
8#include <fmt/format.h>
9
10#include "shader_recompiler/frontend/ir/basic_block.h"
11#include "shader_recompiler/frontend/ir/program.h"
12#include "shader_recompiler/frontend/ir/value.h"
13
14namespace Shader::IR {
15
16std::string DumpProgram(const Program& program) {
17 size_t index{0};
18 std::map<const IR::Inst*, size_t> inst_to_index;
19 std::map<const IR::Block*, size_t> block_to_index;
20
21 for (const IR::Block* const block : program.blocks) {
22 block_to_index.emplace(block, index);
23 ++index;
24 }
25 std::string ret;
26 for (const auto& block : program.blocks) {
27 ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n';
28 }
29 return ret;
30}
31
32} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h
new file mode 100644
index 000000000..ebcaa8bc2
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/program.h
@@ -0,0 +1,35 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <string>
9
10#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
11#include "shader_recompiler/frontend/ir/basic_block.h"
12#include "shader_recompiler/program_header.h"
13#include "shader_recompiler/shader_info.h"
14#include "shader_recompiler/stage.h"
15
16namespace Shader::IR {
17
18struct Program {
19 AbstractSyntaxList syntax_list;
20 BlockList blocks;
21 BlockList post_order_blocks;
22 Info info;
23 Stage stage{};
24 std::array<u32, 3> workgroup_size{};
25 OutputTopology output_topology{};
26 u32 output_vertices{};
27 u32 invocations{};
28 u32 local_memory_size{};
29 u32 shared_memory_size{};
30 bool is_geometry_passthrough{};
31};
32
33[[nodiscard]] std::string DumpProgram(const Program& program);
34
35} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h
new file mode 100644
index 000000000..a4b635792
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/reg.h
@@ -0,0 +1,332 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <fmt/format.h>
8
9#include "common/common_types.h"
10#include "shader_recompiler/exception.h"
11
12namespace Shader::IR {
13
14enum class Reg : u64 {
15 R0,
16 R1,
17 R2,
18 R3,
19 R4,
20 R5,
21 R6,
22 R7,
23 R8,
24 R9,
25 R10,
26 R11,
27 R12,
28 R13,
29 R14,
30 R15,
31 R16,
32 R17,
33 R18,
34 R19,
35 R20,
36 R21,
37 R22,
38 R23,
39 R24,
40 R25,
41 R26,
42 R27,
43 R28,
44 R29,
45 R30,
46 R31,
47 R32,
48 R33,
49 R34,
50 R35,
51 R36,
52 R37,
53 R38,
54 R39,
55 R40,
56 R41,
57 R42,
58 R43,
59 R44,
60 R45,
61 R46,
62 R47,
63 R48,
64 R49,
65 R50,
66 R51,
67 R52,
68 R53,
69 R54,
70 R55,
71 R56,
72 R57,
73 R58,
74 R59,
75 R60,
76 R61,
77 R62,
78 R63,
79 R64,
80 R65,
81 R66,
82 R67,
83 R68,
84 R69,
85 R70,
86 R71,
87 R72,
88 R73,
89 R74,
90 R75,
91 R76,
92 R77,
93 R78,
94 R79,
95 R80,
96 R81,
97 R82,
98 R83,
99 R84,
100 R85,
101 R86,
102 R87,
103 R88,
104 R89,
105 R90,
106 R91,
107 R92,
108 R93,
109 R94,
110 R95,
111 R96,
112 R97,
113 R98,
114 R99,
115 R100,
116 R101,
117 R102,
118 R103,
119 R104,
120 R105,
121 R106,
122 R107,
123 R108,
124 R109,
125 R110,
126 R111,
127 R112,
128 R113,
129 R114,
130 R115,
131 R116,
132 R117,
133 R118,
134 R119,
135 R120,
136 R121,
137 R122,
138 R123,
139 R124,
140 R125,
141 R126,
142 R127,
143 R128,
144 R129,
145 R130,
146 R131,
147 R132,
148 R133,
149 R134,
150 R135,
151 R136,
152 R137,
153 R138,
154 R139,
155 R140,
156 R141,
157 R142,
158 R143,
159 R144,
160 R145,
161 R146,
162 R147,
163 R148,
164 R149,
165 R150,
166 R151,
167 R152,
168 R153,
169 R154,
170 R155,
171 R156,
172 R157,
173 R158,
174 R159,
175 R160,
176 R161,
177 R162,
178 R163,
179 R164,
180 R165,
181 R166,
182 R167,
183 R168,
184 R169,
185 R170,
186 R171,
187 R172,
188 R173,
189 R174,
190 R175,
191 R176,
192 R177,
193 R178,
194 R179,
195 R180,
196 R181,
197 R182,
198 R183,
199 R184,
200 R185,
201 R186,
202 R187,
203 R188,
204 R189,
205 R190,
206 R191,
207 R192,
208 R193,
209 R194,
210 R195,
211 R196,
212 R197,
213 R198,
214 R199,
215 R200,
216 R201,
217 R202,
218 R203,
219 R204,
220 R205,
221 R206,
222 R207,
223 R208,
224 R209,
225 R210,
226 R211,
227 R212,
228 R213,
229 R214,
230 R215,
231 R216,
232 R217,
233 R218,
234 R219,
235 R220,
236 R221,
237 R222,
238 R223,
239 R224,
240 R225,
241 R226,
242 R227,
243 R228,
244 R229,
245 R230,
246 R231,
247 R232,
248 R233,
249 R234,
250 R235,
251 R236,
252 R237,
253 R238,
254 R239,
255 R240,
256 R241,
257 R242,
258 R243,
259 R244,
260 R245,
261 R246,
262 R247,
263 R248,
264 R249,
265 R250,
266 R251,
267 R252,
268 R253,
269 R254,
270 RZ,
271};
272static_assert(static_cast<int>(Reg::RZ) == 255);
273
274constexpr size_t NUM_USER_REGS = 255;
275constexpr size_t NUM_REGS = 256;
276
277[[nodiscard]] constexpr Reg operator+(Reg reg, int num) {
278 if (reg == Reg::RZ) {
279 // Adding or subtracting registers from RZ yields RZ
280 return Reg::RZ;
281 }
282 const int result{static_cast<int>(reg) + num};
283 if (result >= static_cast<int>(Reg::RZ)) {
284 throw LogicError("Overflow on register arithmetic");
285 }
286 if (result < 0) {
287 throw LogicError("Underflow on register arithmetic");
288 }
289 return static_cast<Reg>(result);
290}
291
292[[nodiscard]] constexpr Reg operator-(Reg reg, int num) {
293 return reg + (-num);
294}
295
296constexpr Reg operator++(Reg& reg) {
297 reg = reg + 1;
298 return reg;
299}
300
301constexpr Reg operator++(Reg& reg, int) {
302 const Reg copy{reg};
303 reg = reg + 1;
304 return copy;
305}
306
307[[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept {
308 return static_cast<size_t>(reg);
309}
310
311[[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) {
312 return RegIndex(reg) % align == 0 || reg == Reg::RZ;
313}
314
315} // namespace Shader::IR
316
317template <>
318struct fmt::formatter<Shader::IR::Reg> {
319 constexpr auto parse(format_parse_context& ctx) {
320 return ctx.begin();
321 }
322 template <typename FormatContext>
323 auto format(const Shader::IR::Reg& reg, FormatContext& ctx) {
324 if (reg == Shader::IR::Reg::RZ) {
325 return fmt::format_to(ctx.out(), "RZ");
326 } else if (static_cast<int>(reg) >= 0 && static_cast<int>(reg) < 255) {
327 return fmt::format_to(ctx.out(), "R{}", static_cast<int>(reg));
328 } else {
329 throw Shader::LogicError("Invalid register with raw value {}", static_cast<int>(reg));
330 }
331 }
332};
diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp
new file mode 100644
index 000000000..f28341bfe
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/type.cpp
@@ -0,0 +1,38 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <string>
7
8#include "shader_recompiler/frontend/ir/type.h"
9
10namespace Shader::IR {
11
12std::string NameOf(Type type) {
13 static constexpr std::array names{
14 "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32",
15 "U64", "F16", "F32", "F64", "U32x2", "U32x3", "U32x4", "F16x2", "F16x3",
16 "F16x4", "F32x2", "F32x3", "F32x4", "F64x2", "F64x3", "F64x4",
17 };
18 const size_t bits{static_cast<size_t>(type)};
19 if (bits == 0) {
20 return "Void";
21 }
22 std::string result;
23 for (size_t i = 0; i < names.size(); i++) {
24 if ((bits & (size_t{1} << i)) != 0) {
25 if (!result.empty()) {
26 result += '|';
27 }
28 result += names[i];
29 }
30 }
31 return result;
32}
33
34bool AreTypesCompatible(Type lhs, Type rhs) noexcept {
35 return lhs == rhs || lhs == Type::Opaque || rhs == Type::Opaque;
36}
37
38} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h
new file mode 100644
index 000000000..294b230c4
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/type.h
@@ -0,0 +1,61 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8
9#include <fmt/format.h>
10
11#include "common/common_funcs.h"
12#include "shader_recompiler/exception.h"
13
14namespace Shader::IR {
15
16enum class Type {
17 Void = 0,
18 Opaque = 1 << 0,
19 Reg = 1 << 1,
20 Pred = 1 << 2,
21 Attribute = 1 << 3,
22 Patch = 1 << 4,
23 U1 = 1 << 5,
24 U8 = 1 << 6,
25 U16 = 1 << 7,
26 U32 = 1 << 8,
27 U64 = 1 << 9,
28 F16 = 1 << 10,
29 F32 = 1 << 11,
30 F64 = 1 << 12,
31 U32x2 = 1 << 13,
32 U32x3 = 1 << 14,
33 U32x4 = 1 << 15,
34 F16x2 = 1 << 16,
35 F16x3 = 1 << 17,
36 F16x4 = 1 << 18,
37 F32x2 = 1 << 19,
38 F32x3 = 1 << 20,
39 F32x4 = 1 << 21,
40 F64x2 = 1 << 22,
41 F64x3 = 1 << 23,
42 F64x4 = 1 << 24,
43};
44DECLARE_ENUM_FLAG_OPERATORS(Type)
45
46[[nodiscard]] std::string NameOf(Type type);
47
48[[nodiscard]] bool AreTypesCompatible(Type lhs, Type rhs) noexcept;
49
50} // namespace Shader::IR
51
52template <>
53struct fmt::formatter<Shader::IR::Type> {
54 constexpr auto parse(format_parse_context& ctx) {
55 return ctx.begin();
56 }
57 template <typename FormatContext>
58 auto format(const Shader::IR::Type& type, FormatContext& ctx) {
59 return fmt::format_to(ctx.out(), "{}", NameOf(type));
60 }
61};
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
new file mode 100644
index 000000000..d365ea1bc
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -0,0 +1,99 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/ir/opcodes.h"
6#include "shader_recompiler/frontend/ir/value.h"
7
8namespace Shader::IR {
9
10Value::Value(IR::Inst* value) noexcept : type{Type::Opaque}, inst{value} {}
11
12Value::Value(IR::Reg value) noexcept : type{Type::Reg}, reg{value} {}
13
14Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {}
15
16Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {}
17
18Value::Value(IR::Patch value) noexcept : type{Type::Patch}, patch{value} {}
19
20Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {}
21
22Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {}
23
24Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {}
25
26Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {}
27
28Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {}
29
30Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {}
31
32Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {}
33
34IR::Type Value::Type() const noexcept {
35 if (IsPhi()) {
36 // The type of a phi node is stored in its flags
37 return inst->Flags<IR::Type>();
38 }
39 if (IsIdentity()) {
40 return inst->Arg(0).Type();
41 }
42 if (type == Type::Opaque) {
43 return inst->Type();
44 }
45 return type;
46}
47
48bool Value::operator==(const Value& other) const {
49 if (type != other.type) {
50 return false;
51 }
52 switch (type) {
53 case Type::Void:
54 return true;
55 case Type::Opaque:
56 return inst == other.inst;
57 case Type::Reg:
58 return reg == other.reg;
59 case Type::Pred:
60 return pred == other.pred;
61 case Type::Attribute:
62 return attribute == other.attribute;
63 case Type::Patch:
64 return patch == other.patch;
65 case Type::U1:
66 return imm_u1 == other.imm_u1;
67 case Type::U8:
68 return imm_u8 == other.imm_u8;
69 case Type::U16:
70 case Type::F16:
71 return imm_u16 == other.imm_u16;
72 case Type::U32:
73 case Type::F32:
74 return imm_u32 == other.imm_u32;
75 case Type::U64:
76 case Type::F64:
77 return imm_u64 == other.imm_u64;
78 case Type::U32x2:
79 case Type::U32x3:
80 case Type::U32x4:
81 case Type::F16x2:
82 case Type::F16x3:
83 case Type::F16x4:
84 case Type::F32x2:
85 case Type::F32x3:
86 case Type::F32x4:
87 case Type::F64x2:
88 case Type::F64x3:
89 case Type::F64x4:
90 break;
91 }
92 throw LogicError("Invalid type {}", type);
93}
94
95bool Value::operator!=(const Value& other) const {
96 return !operator==(other);
97}
98
99} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
new file mode 100644
index 000000000..0c6bf684d
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -0,0 +1,398 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstring>
9#include <memory>
10#include <type_traits>
11#include <utility>
12#include <vector>
13
14#include <boost/container/small_vector.hpp>
15#include <boost/intrusive/list.hpp>
16
17#include "common/assert.h"
18#include "common/bit_cast.h"
19#include "common/common_types.h"
20#include "shader_recompiler/exception.h"
21#include "shader_recompiler/frontend/ir/attribute.h"
22#include "shader_recompiler/frontend/ir/opcodes.h"
23#include "shader_recompiler/frontend/ir/patch.h"
24#include "shader_recompiler/frontend/ir/pred.h"
25#include "shader_recompiler/frontend/ir/reg.h"
26#include "shader_recompiler/frontend/ir/type.h"
27#include "shader_recompiler/frontend/ir/value.h"
28
29namespace Shader::IR {
30
31class Block;
32class Inst;
33
34struct AssociatedInsts;
35
36class Value {
37public:
38 Value() noexcept = default;
39 explicit Value(IR::Inst* value) noexcept;
40 explicit Value(IR::Reg value) noexcept;
41 explicit Value(IR::Pred value) noexcept;
42 explicit Value(IR::Attribute value) noexcept;
43 explicit Value(IR::Patch value) noexcept;
44 explicit Value(bool value) noexcept;
45 explicit Value(u8 value) noexcept;
46 explicit Value(u16 value) noexcept;
47 explicit Value(u32 value) noexcept;
48 explicit Value(f32 value) noexcept;
49 explicit Value(u64 value) noexcept;
50 explicit Value(f64 value) noexcept;
51
52 [[nodiscard]] bool IsIdentity() const noexcept;
53 [[nodiscard]] bool IsPhi() const noexcept;
54 [[nodiscard]] bool IsEmpty() const noexcept;
55 [[nodiscard]] bool IsImmediate() const noexcept;
56 [[nodiscard]] IR::Type Type() const noexcept;
57
58 [[nodiscard]] IR::Inst* Inst() const;
59 [[nodiscard]] IR::Inst* InstRecursive() const;
60 [[nodiscard]] IR::Value Resolve() const;
61 [[nodiscard]] IR::Reg Reg() const;
62 [[nodiscard]] IR::Pred Pred() const;
63 [[nodiscard]] IR::Attribute Attribute() const;
64 [[nodiscard]] IR::Patch Patch() const;
65 [[nodiscard]] bool U1() const;
66 [[nodiscard]] u8 U8() const;
67 [[nodiscard]] u16 U16() const;
68 [[nodiscard]] u32 U32() const;
69 [[nodiscard]] f32 F32() const;
70 [[nodiscard]] u64 U64() const;
71 [[nodiscard]] f64 F64() const;
72
73 [[nodiscard]] bool operator==(const Value& other) const;
74 [[nodiscard]] bool operator!=(const Value& other) const;
75
76private:
77 IR::Type type{};
78 union {
79 IR::Inst* inst{};
80 IR::Reg reg;
81 IR::Pred pred;
82 IR::Attribute attribute;
83 IR::Patch patch;
84 bool imm_u1;
85 u8 imm_u8;
86 u16 imm_u16;
87 u32 imm_u32;
88 f32 imm_f32;
89 u64 imm_u64;
90 f64 imm_f64;
91 };
92};
93static_assert(static_cast<u32>(IR::Type::Void) == 0, "memset relies on IR::Type being zero");
94static_assert(std::is_trivially_copyable_v<Value>);
95
96template <IR::Type type_>
97class TypedValue : public Value {
98public:
99 TypedValue() = default;
100
101 template <IR::Type other_type>
102 requires((other_type & type_) != IR::Type::Void) explicit(false)
103 TypedValue(const TypedValue<other_type>& value)
104 : Value(value) {}
105
106 explicit TypedValue(const Value& value) : Value(value) {
107 if ((value.Type() & type_) == IR::Type::Void) {
108 throw InvalidArgument("Incompatible types {} and {}", type_, value.Type());
109 }
110 }
111
112 explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {}
113};
114
115class Inst : public boost::intrusive::list_base_hook<> {
116public:
117 explicit Inst(IR::Opcode op_, u32 flags_) noexcept;
118 ~Inst();
119
120 Inst& operator=(const Inst&) = delete;
121 Inst(const Inst&) = delete;
122
123 Inst& operator=(Inst&&) = delete;
124 Inst(Inst&&) = delete;
125
126 /// Get the number of uses this instruction has.
127 [[nodiscard]] int UseCount() const noexcept {
128 return use_count;
129 }
130
131 /// Determines whether this instruction has uses or not.
132 [[nodiscard]] bool HasUses() const noexcept {
133 return use_count > 0;
134 }
135
136 /// Get the opcode this microinstruction represents.
137 [[nodiscard]] IR::Opcode GetOpcode() const noexcept {
138 return op;
139 }
140
141 /// Determines if there is a pseudo-operation associated with this instruction.
142 [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept {
143 return associated_insts != nullptr;
144 }
145
146 /// Determines whether or not this instruction may have side effects.
147 [[nodiscard]] bool MayHaveSideEffects() const noexcept;
148
149 /// Determines whether or not this instruction is a pseudo-instruction.
150 /// Pseudo-instructions depend on their parent instructions for their semantics.
151 [[nodiscard]] bool IsPseudoInstruction() const noexcept;
152
153 /// Determines if all arguments of this instruction are immediates.
154 [[nodiscard]] bool AreAllArgsImmediates() const;
155
156 /// Gets a pseudo-operation associated with this instruction
157 [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode);
158
159 /// Get the type this instruction returns.
160 [[nodiscard]] IR::Type Type() const;
161
162 /// Get the number of arguments this instruction has.
163 [[nodiscard]] size_t NumArgs() const {
164 return op == IR::Opcode::Phi ? phi_args.size() : NumArgsOf(op);
165 }
166
167 /// Get the value of a given argument index.
168 [[nodiscard]] Value Arg(size_t index) const noexcept {
169 if (op == IR::Opcode::Phi) {
170 return phi_args[index].second;
171 } else {
172 return args[index];
173 }
174 }
175
176 /// Set the value of a given argument index.
177 void SetArg(size_t index, Value value);
178
179 /// Get a pointer to the block of a phi argument.
180 [[nodiscard]] Block* PhiBlock(size_t index) const;
181 /// Add phi operand to a phi instruction.
182 void AddPhiOperand(Block* predecessor, const Value& value);
183
184 void Invalidate();
185 void ClearArgs();
186
187 void ReplaceUsesWith(Value replacement);
188
189 void ReplaceOpcode(IR::Opcode opcode);
190
191 template <typename FlagsType>
192 requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
193 [[nodiscard]] FlagsType Flags() const noexcept {
194 FlagsType ret;
195 std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret));
196 return ret;
197 }
198
199 template <typename FlagsType>
200 requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
201 [[nodiscard]] void SetFlags(FlagsType value) noexcept {
202 std::memcpy(&flags, &value, sizeof(value));
203 }
204
205 /// Intrusively store the host definition of this instruction.
206 template <typename DefinitionType>
207 void SetDefinition(DefinitionType def) {
208 definition = Common::BitCast<u32>(def);
209 }
210
211 /// Return the intrusively stored host definition of this instruction.
212 template <typename DefinitionType>
213 [[nodiscard]] DefinitionType Definition() const noexcept {
214 return Common::BitCast<DefinitionType>(definition);
215 }
216
217 /// Destructively remove one reference count from the instruction
218 /// Useful for register allocation
219 void DestructiveRemoveUsage() {
220 --use_count;
221 }
222
223 /// Destructively add usages to the instruction
224 /// Useful for register allocation
225 void DestructiveAddUsage(int count) {
226 use_count += count;
227 }
228
229private:
230 struct NonTriviallyDummy {
231 NonTriviallyDummy() noexcept {}
232 };
233
234 void Use(const Value& value);
235 void UndoUse(const Value& value);
236
237 IR::Opcode op{};
238 int use_count{};
239 u32 flags{};
240 u32 definition{};
241 union {
242 NonTriviallyDummy dummy{};
243 boost::container::small_vector<std::pair<Block*, Value>, 2> phi_args;
244 std::array<Value, 5> args;
245 };
246 std::unique_ptr<AssociatedInsts> associated_insts;
247};
248static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");
249
250struct AssociatedInsts {
251 union {
252 Inst* in_bounds_inst;
253 Inst* sparse_inst;
254 Inst* zero_inst{};
255 };
256 Inst* sign_inst{};
257 Inst* carry_inst{};
258 Inst* overflow_inst{};
259};
260
261using U1 = TypedValue<Type::U1>;
262using U8 = TypedValue<Type::U8>;
263using U16 = TypedValue<Type::U16>;
264using U32 = TypedValue<Type::U32>;
265using U64 = TypedValue<Type::U64>;
266using F16 = TypedValue<Type::F16>;
267using F32 = TypedValue<Type::F32>;
268using F64 = TypedValue<Type::F64>;
269using U32U64 = TypedValue<Type::U32 | Type::U64>;
270using F32F64 = TypedValue<Type::F32 | Type::F64>;
271using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>;
272using F16F32F64 = TypedValue<Type::F16 | Type::F32 | Type::F64>;
273using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
274
275inline bool Value::IsIdentity() const noexcept {
276 return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity;
277}
278
279inline bool Value::IsPhi() const noexcept {
280 return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi;
281}
282
283inline bool Value::IsEmpty() const noexcept {
284 return type == Type::Void;
285}
286
287inline bool Value::IsImmediate() const noexcept {
288 IR::Type current_type{type};
289 const IR::Inst* current_inst{inst};
290 while (current_type == Type::Opaque && current_inst->GetOpcode() == Opcode::Identity) {
291 const Value& arg{current_inst->Arg(0)};
292 current_type = arg.type;
293 current_inst = arg.inst;
294 }
295 return current_type != Type::Opaque;
296}
297
298inline IR::Inst* Value::Inst() const {
299 DEBUG_ASSERT(type == Type::Opaque);
300 return inst;
301}
302
303inline IR::Inst* Value::InstRecursive() const {
304 DEBUG_ASSERT(type == Type::Opaque);
305 if (IsIdentity()) {
306 return inst->Arg(0).InstRecursive();
307 }
308 return inst;
309}
310
311inline IR::Value Value::Resolve() const {
312 if (IsIdentity()) {
313 return inst->Arg(0).Resolve();
314 }
315 return *this;
316}
317
318inline IR::Reg Value::Reg() const {
319 DEBUG_ASSERT(type == Type::Reg);
320 return reg;
321}
322
323inline IR::Pred Value::Pred() const {
324 DEBUG_ASSERT(type == Type::Pred);
325 return pred;
326}
327
328inline IR::Attribute Value::Attribute() const {
329 DEBUG_ASSERT(type == Type::Attribute);
330 return attribute;
331}
332
333inline IR::Patch Value::Patch() const {
334 DEBUG_ASSERT(type == Type::Patch);
335 return patch;
336}
337
338inline bool Value::U1() const {
339 if (IsIdentity()) {
340 return inst->Arg(0).U1();
341 }
342 DEBUG_ASSERT(type == Type::U1);
343 return imm_u1;
344}
345
346inline u8 Value::U8() const {
347 if (IsIdentity()) {
348 return inst->Arg(0).U8();
349 }
350 DEBUG_ASSERT(type == Type::U8);
351 return imm_u8;
352}
353
354inline u16 Value::U16() const {
355 if (IsIdentity()) {
356 return inst->Arg(0).U16();
357 }
358 DEBUG_ASSERT(type == Type::U16);
359 return imm_u16;
360}
361
362inline u32 Value::U32() const {
363 if (IsIdentity()) {
364 return inst->Arg(0).U32();
365 }
366 DEBUG_ASSERT(type == Type::U32);
367 return imm_u32;
368}
369
370inline f32 Value::F32() const {
371 if (IsIdentity()) {
372 return inst->Arg(0).F32();
373 }
374 DEBUG_ASSERT(type == Type::F32);
375 return imm_f32;
376}
377
378inline u64 Value::U64() const {
379 if (IsIdentity()) {
380 return inst->Arg(0).U64();
381 }
382 DEBUG_ASSERT(type == Type::U64);
383 return imm_u64;
384}
385
386inline f64 Value::F64() const {
387 if (IsIdentity()) {
388 return inst->Arg(0).F64();
389 }
390 DEBUG_ASSERT(type == Type::F64);
391 return imm_f64;
392}
393
394[[nodiscard]] inline bool IsPhi(const Inst& inst) {
395 return inst.GetOpcode() == Opcode::Phi;
396}
397
398} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
new file mode 100644
index 000000000..1a954a509
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
@@ -0,0 +1,642 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <optional>
8#include <string>
9#include <utility>
10
11#include <fmt/format.h>
12
13#include "shader_recompiler/exception.h"
14#include "shader_recompiler/frontend/maxwell/control_flow.h"
15#include "shader_recompiler/frontend/maxwell/decode.h"
16#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
17#include "shader_recompiler/frontend/maxwell/location.h"
18
19namespace Shader::Maxwell::Flow {
20namespace {
21struct Compare {
22 bool operator()(const Block& lhs, Location rhs) const noexcept {
23 return lhs.begin < rhs;
24 }
25
26 bool operator()(Location lhs, const Block& rhs) const noexcept {
27 return lhs < rhs.begin;
28 }
29
30 bool operator()(const Block& lhs, const Block& rhs) const noexcept {
31 return lhs.begin < rhs.begin;
32 }
33};
34
35u32 BranchOffset(Location pc, Instruction inst) {
36 return pc.Offset() + static_cast<u32>(inst.branch.Offset()) + 8u;
37}
38
39void Split(Block* old_block, Block* new_block, Location pc) {
40 if (pc <= old_block->begin || pc >= old_block->end) {
41 throw InvalidArgument("Invalid address to split={}", pc);
42 }
43 *new_block = Block{};
44 new_block->begin = pc;
45 new_block->end = old_block->end;
46 new_block->end_class = old_block->end_class;
47 new_block->cond = old_block->cond;
48 new_block->stack = old_block->stack;
49 new_block->branch_true = old_block->branch_true;
50 new_block->branch_false = old_block->branch_false;
51 new_block->function_call = old_block->function_call;
52 new_block->return_block = old_block->return_block;
53 new_block->branch_reg = old_block->branch_reg;
54 new_block->branch_offset = old_block->branch_offset;
55 new_block->indirect_branches = std::move(old_block->indirect_branches);
56
57 const Location old_begin{old_block->begin};
58 Stack old_stack{std::move(old_block->stack)};
59 *old_block = Block{};
60 old_block->begin = old_begin;
61 old_block->end = pc;
62 old_block->end_class = EndClass::Branch;
63 old_block->cond = IR::Condition(true);
64 old_block->stack = old_stack;
65 old_block->branch_true = new_block;
66 old_block->branch_false = nullptr;
67}
68
69Token OpcodeToken(Opcode opcode) {
70 switch (opcode) {
71 case Opcode::PBK:
72 case Opcode::BRK:
73 return Token::PBK;
74 case Opcode::PCNT:
75 case Opcode::CONT:
76 return Token::PBK;
77 case Opcode::PEXIT:
78 case Opcode::EXIT:
79 return Token::PEXIT;
80 case Opcode::PLONGJMP:
81 case Opcode::LONGJMP:
82 return Token::PLONGJMP;
83 case Opcode::PRET:
84 case Opcode::RET:
85 case Opcode::CAL:
86 return Token::PRET;
87 case Opcode::SSY:
88 case Opcode::SYNC:
89 return Token::SSY;
90 default:
91 throw InvalidArgument("{}", opcode);
92 }
93}
94
95bool IsAbsoluteJump(Opcode opcode) {
96 switch (opcode) {
97 case Opcode::JCAL:
98 case Opcode::JMP:
99 case Opcode::JMX:
100 return true;
101 default:
102 return false;
103 }
104}
105
106bool HasFlowTest(Opcode opcode) {
107 switch (opcode) {
108 case Opcode::BRA:
109 case Opcode::BRX:
110 case Opcode::EXIT:
111 case Opcode::JMP:
112 case Opcode::JMX:
113 case Opcode::KIL:
114 case Opcode::BRK:
115 case Opcode::CONT:
116 case Opcode::LONGJMP:
117 case Opcode::RET:
118 case Opcode::SYNC:
119 return true;
120 case Opcode::CAL:
121 case Opcode::JCAL:
122 return false;
123 default:
124 throw InvalidArgument("Invalid branch {}", opcode);
125 }
126}
127
128std::string NameOf(const Block& block) {
129 if (block.begin.IsVirtual()) {
130 return fmt::format("\"Virtual {}\"", block.begin);
131 } else {
132 return fmt::format("\"{}\"", block.begin);
133 }
134}
135} // Anonymous namespace
136
137void Stack::Push(Token token, Location target) {
138 entries.push_back({
139 .token = token,
140 .target{target},
141 });
142}
143
144std::pair<Location, Stack> Stack::Pop(Token token) const {
145 const std::optional<Location> pc{Peek(token)};
146 if (!pc) {
147 throw LogicError("Token could not be found");
148 }
149 return {*pc, Remove(token)};
150}
151
152std::optional<Location> Stack::Peek(Token token) const {
153 const auto it{std::find_if(entries.rbegin(), entries.rend(),
154 [token](const auto& entry) { return entry.token == token; })};
155 if (it == entries.rend()) {
156 return std::nullopt;
157 }
158 return it->target;
159}
160
161Stack Stack::Remove(Token token) const {
162 const auto it{std::find_if(entries.rbegin(), entries.rend(),
163 [token](const auto& entry) { return entry.token == token; })};
164 const auto pos{std::distance(entries.rbegin(), it)};
165 Stack result;
166 result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1);
167 return result;
168}
169
170bool Block::Contains(Location pc) const noexcept {
171 return pc >= begin && pc < end;
172}
173
174Function::Function(ObjectPool<Block>& block_pool, Location start_address)
175 : entrypoint{start_address} {
176 Label& label{labels.emplace_back()};
177 label.address = start_address;
178 label.block = block_pool.Create(Block{});
179 label.block->begin = start_address;
180 label.block->end = start_address;
181 label.block->end_class = EndClass::Branch;
182 label.block->cond = IR::Condition(true);
183 label.block->branch_true = nullptr;
184 label.block->branch_false = nullptr;
185}
186
187CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address,
188 bool exits_to_dispatcher_)
189 : env{env_}, block_pool{block_pool_}, program_start{start_address}, exits_to_dispatcher{
190 exits_to_dispatcher_} {
191 if (exits_to_dispatcher) {
192 dispatch_block = block_pool.Create(Block{});
193 dispatch_block->begin = {};
194 dispatch_block->end = {};
195 dispatch_block->end_class = EndClass::Exit;
196 dispatch_block->cond = IR::Condition(true);
197 dispatch_block->stack = {};
198 dispatch_block->branch_true = nullptr;
199 dispatch_block->branch_false = nullptr;
200 }
201 functions.emplace_back(block_pool, start_address);
202 for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) {
203 while (!functions[function_id].labels.empty()) {
204 Function& function{functions[function_id]};
205 Label label{function.labels.back()};
206 function.labels.pop_back();
207 AnalyzeLabel(function_id, label);
208 }
209 }
210 if (exits_to_dispatcher) {
211 const auto last_block{functions[0].blocks.rbegin()};
212 dispatch_block->begin = last_block->end + 1;
213 dispatch_block->end = last_block->end + 1;
214 functions[0].blocks.insert(*dispatch_block);
215 }
216}
217
218void CFG::AnalyzeLabel(FunctionId function_id, Label& label) {
219 if (InspectVisitedBlocks(function_id, label)) {
220 // Label address has been visited
221 return;
222 }
223 // Try to find the next block
224 Function* const function{&functions[function_id]};
225 Location pc{label.address};
226 const auto next_it{function->blocks.upper_bound(pc, Compare{})};
227 const bool is_last{next_it == function->blocks.end()};
228 Block* const next{is_last ? nullptr : &*next_it};
229 // Insert before the next block
230 Block* const block{label.block};
231 // Analyze instructions until it reaches an already visited block or there's a branch
232 bool is_branch{false};
233 while (!next || pc < next->begin) {
234 is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch;
235 if (is_branch) {
236 break;
237 }
238 ++pc;
239 }
240 if (!is_branch) {
241 // If the block finished without a branch,
242 // it means that the next instruction is already visited, jump to it
243 block->end = pc;
244 block->cond = IR::Condition{true};
245 block->branch_true = next;
246 block->branch_false = nullptr;
247 }
248 // Function's pointer might be invalid, resolve it again
249 // Insert the new block
250 functions[function_id].blocks.insert(*block);
251}
252
253bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) {
254 const Location pc{label.address};
255 Function& function{functions[function_id]};
256 const auto it{
257 std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })};
258 if (it == function.blocks.end()) {
259 // Address has not been visited
260 return false;
261 }
262 Block* const visited_block{&*it};
263 if (visited_block->begin == pc) {
264 throw LogicError("Dangling block");
265 }
266 Block* const new_block{label.block};
267 Split(visited_block, new_block, pc);
268 function.blocks.insert(it, *new_block);
269 return true;
270}
271
272CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) {
273 const Instruction inst{env.ReadInstruction(pc.Offset())};
274 const Opcode opcode{Decode(inst.raw)};
275 switch (opcode) {
276 case Opcode::BRA:
277 case Opcode::JMP:
278 case Opcode::RET:
279 if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
280 return AnalysisState::Continue;
281 }
282 switch (opcode) {
283 case Opcode::BRA:
284 case Opcode::JMP:
285 AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode));
286 break;
287 case Opcode::RET:
288 block->end_class = EndClass::Return;
289 break;
290 default:
291 break;
292 }
293 block->end = pc;
294 return AnalysisState::Branch;
295 case Opcode::BRK:
296 case Opcode::CONT:
297 case Opcode::LONGJMP:
298 case Opcode::SYNC: {
299 if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
300 return AnalysisState::Continue;
301 }
302 const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))};
303 block->branch_true = AddLabel(block, new_stack, stack_pc, function_id);
304 block->end = pc;
305 return AnalysisState::Branch;
306 }
307 case Opcode::KIL: {
308 const Predicate pred{inst.Pred()};
309 const auto ir_pred{static_cast<IR::Pred>(pred.index)};
310 const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated};
311 AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond);
312 return AnalysisState::Branch;
313 }
314 case Opcode::PBK:
315 case Opcode::PCNT:
316 case Opcode::PEXIT:
317 case Opcode::PLONGJMP:
318 case Opcode::SSY:
319 block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst));
320 return AnalysisState::Continue;
321 case Opcode::BRX:
322 case Opcode::JMX:
323 return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id);
324 case Opcode::EXIT:
325 return AnalyzeEXIT(block, function_id, pc, inst);
326 case Opcode::PRET:
327 throw NotImplementedException("PRET flow analysis");
328 case Opcode::CAL:
329 case Opcode::JCAL: {
330 const bool is_absolute{IsAbsoluteJump(opcode)};
331 const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
332 // Technically CAL pushes into PRET, but that's implicit in the function call for us
333 // Insert the function into the list if it doesn't exist
334 const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)};
335 const bool exists{it != functions.end()};
336 const FunctionId call_id{exists ? static_cast<size_t>(std::distance(functions.begin(), it))
337 : functions.size()};
338 if (!exists) {
339 functions.emplace_back(block_pool, cal_pc);
340 }
341 block->end_class = EndClass::Call;
342 block->function_call = call_id;
343 block->return_block = AddLabel(block, block->stack, pc + 1, function_id);
344 block->end = pc;
345 return AnalysisState::Branch;
346 }
347 default:
348 break;
349 }
350 const Predicate pred{inst.Pred()};
351 if (pred == Predicate{true} || pred == Predicate{false}) {
352 return AnalysisState::Continue;
353 }
354 const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated};
355 AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond);
356 return AnalysisState::Branch;
357}
358
359void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
360 EndClass insn_end_class, IR::Condition cond) {
361 if (block->begin != pc) {
362 // If the block doesn't start in the conditional instruction
363 // mark it as a label to visit it later
364 block->end = pc;
365 block->cond = IR::Condition{true};
366 block->branch_true = AddLabel(block, block->stack, pc, function_id);
367 block->branch_false = nullptr;
368 return;
369 }
370 // Create a virtual block and a conditional block
371 Block* const conditional_block{block_pool.Create()};
372 Block virtual_block{};
373 virtual_block.begin = block->begin.Virtual();
374 virtual_block.end = block->begin.Virtual();
375 virtual_block.end_class = EndClass::Branch;
376 virtual_block.stack = block->stack;
377 virtual_block.cond = cond;
378 virtual_block.branch_true = conditional_block;
379 virtual_block.branch_false = nullptr;
380 // Save the contents of the visited block in the conditional block
381 *conditional_block = std::move(*block);
382 // Impersonate the visited block with a virtual block
383 *block = std::move(virtual_block);
384 // Set the end properties of the conditional instruction
385 conditional_block->end = pc + 1;
386 conditional_block->end_class = insn_end_class;
387 // Add a label to the instruction after the conditional instruction
388 Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)};
389 // Branch to the next instruction from the virtual block
390 block->branch_false = endif_block;
391 // And branch to it from the conditional instruction if it is a branch or a kill instruction
392 // Kill instructions are considered a branch because they demote to a helper invocation and
393 // execution may continue.
394 if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) {
395 conditional_block->cond = IR::Condition{true};
396 conditional_block->branch_true = endif_block;
397 conditional_block->branch_false = nullptr;
398 }
399 // Finally insert the condition block into the list of blocks
400 functions[function_id].blocks.insert(*conditional_block);
401}
402
403bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
404 Opcode opcode) {
405 if (inst.branch.is_cbuf) {
406 throw NotImplementedException("Branch with constant buffer offset");
407 }
408 const Predicate pred{inst.Pred()};
409 if (pred == Predicate{false}) {
410 return false;
411 }
412 const bool has_flow_test{HasFlowTest(opcode)};
413 const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T};
414 if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
415 block->cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated);
416 block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
417 } else {
418 block->cond = IR::Condition{true};
419 }
420 return true;
421}
422
423void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
424 bool is_absolute) {
425 const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
426 block->branch_true = AddLabel(block, block->stack, bra_pc, function_id);
427}
428
429CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
430 FunctionId function_id) {
431 const std::optional brx_table{TrackIndirectBranchTable(env, pc, program_start)};
432 if (!brx_table) {
433 TrackIndirectBranchTable(env, pc, program_start);
434 throw NotImplementedException("Failed to track indirect branch");
435 }
436 const IR::FlowTest flow_test{inst.branch.flow_test};
437 const Predicate pred{inst.Pred()};
438 if (flow_test != IR::FlowTest::T || pred != Predicate{true}) {
439 throw NotImplementedException("Conditional indirect branch");
440 }
441 std::vector<u32> targets;
442 targets.reserve(brx_table->num_entries);
443 for (u32 i = 0; i < brx_table->num_entries; ++i) {
444 u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)};
445 if (!is_absolute) {
446 target += pc.Offset();
447 }
448 target += static_cast<u32>(brx_table->branch_offset);
449 target += 8;
450 targets.push_back(target);
451 }
452 std::ranges::sort(targets);
453 targets.erase(std::unique(targets.begin(), targets.end()), targets.end());
454
455 block->indirect_branches.reserve(targets.size());
456 for (const u32 target : targets) {
457 Block* const branch{AddLabel(block, block->stack, target, function_id)};
458 block->indirect_branches.push_back({
459 .block = branch,
460 .address = target,
461 });
462 }
463 block->cond = IR::Condition{true};
464 block->end = pc + 1;
465 block->end_class = EndClass::IndirectBranch;
466 block->branch_reg = brx_table->branch_reg;
467 block->branch_offset = brx_table->branch_offset + 8;
468 if (!is_absolute) {
469 block->branch_offset += pc.Offset();
470 }
471 return AnalysisState::Branch;
472}
473
474CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc,
475 Instruction inst) {
476 const IR::FlowTest flow_test{inst.branch.flow_test};
477 const Predicate pred{inst.Pred()};
478 if (pred == Predicate{false} || flow_test == IR::FlowTest::F) {
479 // EXIT will never be taken
480 return AnalysisState::Continue;
481 }
482 if (exits_to_dispatcher && function_id != 0) {
483 throw NotImplementedException("Dispatch EXIT on external function");
484 }
485 if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
486 if (block->stack.Peek(Token::PEXIT).has_value()) {
487 throw NotImplementedException("Conditional EXIT with PEXIT token");
488 }
489 const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated};
490 if (exits_to_dispatcher) {
491 block->end = pc;
492 block->end_class = EndClass::Branch;
493 block->cond = cond;
494 block->branch_true = dispatch_block;
495 block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
496 return AnalysisState::Branch;
497 }
498 AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond);
499 return AnalysisState::Branch;
500 }
501 if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) {
502 const Stack popped_stack{block->stack.Remove(Token::PEXIT)};
503 block->cond = IR::Condition{true};
504 block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id);
505 block->branch_false = nullptr;
506 return AnalysisState::Branch;
507 }
508 if (exits_to_dispatcher) {
509 block->cond = IR::Condition{true};
510 block->end = pc;
511 block->end_class = EndClass::Branch;
512 block->branch_true = dispatch_block;
513 block->branch_false = nullptr;
514 return AnalysisState::Branch;
515 }
516 block->end = pc + 1;
517 block->end_class = EndClass::Exit;
518 return AnalysisState::Branch;
519}
520
521Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) {
522 Function& function{functions[function_id]};
523 if (block->begin == pc) {
524 // Jumps to itself
525 return block;
526 }
527 if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) {
528 // Block already exists and it has been visited
529 if (function.blocks.begin() != it) {
530 // Check if the previous node is the virtual variant of the label
531 // This won't exist if a virtual node is not needed or it hasn't been visited
532 // If it hasn't been visited and a virtual node is needed, this will still behave as
533 // expected because the node impersonated with its virtual node.
534 const auto prev{std::prev(it)};
535 if (it->begin.Virtual() == prev->begin) {
536 return &*prev;
537 }
538 }
539 return &*it;
540 }
541 // Make sure we don't insert the same layer twice
542 const auto label_it{std::ranges::find(function.labels, pc, &Label::address)};
543 if (label_it != function.labels.end()) {
544 return label_it->block;
545 }
546 Block* const new_block{block_pool.Create()};
547 new_block->begin = pc;
548 new_block->end = pc;
549 new_block->end_class = EndClass::Branch;
550 new_block->cond = IR::Condition(true);
551 new_block->stack = stack;
552 new_block->branch_true = nullptr;
553 new_block->branch_false = nullptr;
554 function.labels.push_back(Label{
555 .address{pc},
556 .block = new_block,
557 .stack{std::move(stack)},
558 });
559 return new_block;
560}
561
562std::string CFG::Dot() const {
563 int node_uid{0};
564
565 std::string dot{"digraph shader {\n"};
566 for (const Function& function : functions) {
567 dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint);
568 dot += fmt::format("\t\tnode [style=filled];\n");
569 for (const Block& block : function.blocks) {
570 const std::string name{NameOf(block)};
571 const auto add_branch = [&](Block* branch, bool add_label) {
572 dot += fmt::format("\t\t{}->{}", name, NameOf(*branch));
573 if (add_label && block.cond != IR::Condition{true} &&
574 block.cond != IR::Condition{false}) {
575 dot += fmt::format(" [label=\"{}\"]", block.cond);
576 }
577 dot += '\n';
578 };
579 dot += fmt::format("\t\t{};\n", name);
580 switch (block.end_class) {
581 case EndClass::Branch:
582 if (block.cond != IR::Condition{false}) {
583 add_branch(block.branch_true, true);
584 }
585 if (block.cond != IR::Condition{true}) {
586 add_branch(block.branch_false, false);
587 }
588 break;
589 case EndClass::IndirectBranch:
590 for (const IndirectBranch& branch : block.indirect_branches) {
591 add_branch(branch.block, false);
592 }
593 break;
594 case EndClass::Call:
595 dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
596 dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block));
597 dot += fmt::format("\t\tN{} [label=\"Call {}\"][shape=square][style=stripped];\n",
598 node_uid, block.function_call);
599 dot += '\n';
600 ++node_uid;
601 break;
602 case EndClass::Exit:
603 dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
604 dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n",
605 node_uid);
606 ++node_uid;
607 break;
608 case EndClass::Return:
609 dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
610 dot += fmt::format("\t\tN{} [label=\"Return\"][shape=square][style=stripped];\n",
611 node_uid);
612 ++node_uid;
613 break;
614 case EndClass::Kill:
615 dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
616 dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n",
617 node_uid);
618 ++node_uid;
619 break;
620 }
621 }
622 if (function.entrypoint == 8) {
623 dot += fmt::format("\t\tlabel = \"main\";\n");
624 } else {
625 dot += fmt::format("\t\tlabel = \"Function {}\";\n", function.entrypoint);
626 }
627 dot += "\t}\n";
628 }
629 if (!functions.empty()) {
630 auto& function{functions.front()};
631 if (function.blocks.empty()) {
632 dot += "Start;\n";
633 } else {
634 dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin()));
635 }
636 dot += fmt::format("\tStart [shape=diamond];\n");
637 }
638 dot += "}\n";
639 return dot;
640}
641
642} // namespace Shader::Maxwell::Flow
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h
new file mode 100644
index 000000000..a6bd3e196
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.h
@@ -0,0 +1,169 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <compare>
8#include <optional>
9#include <span>
10#include <string>
11#include <vector>
12
13#include <boost/container/small_vector.hpp>
14#include <boost/intrusive/set.hpp>
15
16#include "shader_recompiler/environment.h"
17#include "shader_recompiler/frontend/ir/condition.h"
18#include "shader_recompiler/frontend/maxwell/instruction.h"
19#include "shader_recompiler/frontend/maxwell/location.h"
20#include "shader_recompiler/frontend/maxwell/opcodes.h"
21#include "shader_recompiler/object_pool.h"
22
23namespace Shader::Maxwell::Flow {
24
25struct Block;
26
27using FunctionId = size_t;
28
29enum class EndClass {
30 Branch,
31 IndirectBranch,
32 Call,
33 Exit,
34 Return,
35 Kill,
36};
37
38enum class Token {
39 SSY,
40 PBK,
41 PEXIT,
42 PRET,
43 PCNT,
44 PLONGJMP,
45};
46
47struct StackEntry {
48 auto operator<=>(const StackEntry&) const noexcept = default;
49
50 Token token;
51 Location target;
52};
53
54class Stack {
55public:
56 void Push(Token token, Location target);
57 [[nodiscard]] std::pair<Location, Stack> Pop(Token token) const;
58 [[nodiscard]] std::optional<Location> Peek(Token token) const;
59 [[nodiscard]] Stack Remove(Token token) const;
60
61private:
62 boost::container::small_vector<StackEntry, 3> entries;
63};
64
65struct IndirectBranch {
66 Block* block;
67 u32 address;
68};
69
70struct Block : boost::intrusive::set_base_hook<
71 // Normal link is ~2.5% faster compared to safe link
72 boost::intrusive::link_mode<boost::intrusive::normal_link>> {
73 [[nodiscard]] bool Contains(Location pc) const noexcept;
74
75 bool operator<(const Block& rhs) const noexcept {
76 return begin < rhs.begin;
77 }
78
79 Location begin;
80 Location end;
81 EndClass end_class{};
82 IR::Condition cond{};
83 Stack stack;
84 Block* branch_true{};
85 Block* branch_false{};
86 FunctionId function_call{};
87 Block* return_block{};
88 IR::Reg branch_reg{};
89 s32 branch_offset{};
90 std::vector<IndirectBranch> indirect_branches;
91};
92
93struct Label {
94 Location address;
95 Block* block;
96 Stack stack;
97};
98
99struct Function {
100 explicit Function(ObjectPool<Block>& block_pool, Location start_address);
101
102 Location entrypoint;
103 boost::container::small_vector<Label, 16> labels;
104 boost::intrusive::set<Block> blocks;
105};
106
107class CFG {
108 enum class AnalysisState {
109 Branch,
110 Continue,
111 };
112
113public:
114 explicit CFG(Environment& env, ObjectPool<Block>& block_pool, Location start_address,
115 bool exits_to_dispatcher = false);
116
117 CFG& operator=(const CFG&) = delete;
118 CFG(const CFG&) = delete;
119
120 CFG& operator=(CFG&&) = delete;
121 CFG(CFG&&) = delete;
122
123 [[nodiscard]] std::string Dot() const;
124
125 [[nodiscard]] std::span<const Function> Functions() const noexcept {
126 return std::span(functions.data(), functions.size());
127 }
128 [[nodiscard]] std::span<Function> Functions() noexcept {
129 return std::span(functions.data(), functions.size());
130 }
131
132 [[nodiscard]] bool ExitsToDispatcher() const {
133 return exits_to_dispatcher;
134 }
135
136private:
137 void AnalyzeLabel(FunctionId function_id, Label& label);
138
139 /// Inspect already visited blocks.
140 /// Return true when the block has already been visited
141 bool InspectVisitedBlocks(FunctionId function_id, const Label& label);
142
143 AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc);
144
145 void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class,
146 IR::Condition cond);
147
148 /// Return true when the branch instruction is confirmed to be a branch
149 bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
150 Opcode opcode);
151
152 void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
153 bool is_absolute);
154 AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
155 FunctionId function_id);
156 AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst);
157
158 /// Return the branch target block id
159 Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id);
160
161 Environment& env;
162 ObjectPool<Block>& block_pool;
163 boost::container::small_vector<Function, 1> functions;
164 Location program_start;
165 bool exits_to_dispatcher{};
166 Block* dispatch_block{};
167};
168
169} // namespace Shader::Maxwell::Flow
diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp
new file mode 100644
index 000000000..972f677dc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/decode.cpp
@@ -0,0 +1,149 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <bit>
8#include <memory>
9#include <string_view>
10
11#include "common/common_types.h"
12#include "shader_recompiler/exception.h"
13#include "shader_recompiler/frontend/maxwell/decode.h"
14#include "shader_recompiler/frontend/maxwell/opcodes.h"
15
16namespace Shader::Maxwell {
17namespace {
18struct MaskValue {
19 u64 mask;
20 u64 value;
21};
22
23constexpr MaskValue MaskValueFromEncoding(const char* encoding) {
24 u64 mask{};
25 u64 value{};
26 u64 bit{u64(1) << 63};
27 while (*encoding) {
28 switch (*encoding) {
29 case '0':
30 mask |= bit;
31 break;
32 case '1':
33 mask |= bit;
34 value |= bit;
35 break;
36 case '-':
37 break;
38 case ' ':
39 break;
40 default:
41 throw LogicError("Invalid encoding character '{}'", *encoding);
42 }
43 ++encoding;
44 if (*encoding != ' ') {
45 bit >>= 1;
46 }
47 }
48 return MaskValue{.mask = mask, .value = value};
49}
50
51struct InstEncoding {
52 MaskValue mask_value;
53 Opcode opcode;
54};
55constexpr std::array UNORDERED_ENCODINGS{
56#define INST(name, cute, encode) \
57 InstEncoding{ \
58 .mask_value{MaskValueFromEncoding(encode)}, \
59 .opcode = Opcode::name, \
60 },
61#include "maxwell.inc"
62#undef INST
63};
64
65constexpr auto SortedEncodings() {
66 std::array encodings{UNORDERED_ENCODINGS};
67 std::ranges::sort(encodings, [](const InstEncoding& lhs, const InstEncoding& rhs) {
68 return std::popcount(lhs.mask_value.mask) > std::popcount(rhs.mask_value.mask);
69 });
70 return encodings;
71}
72constexpr auto ENCODINGS{SortedEncodings()};
73
74constexpr int WidestLeftBits() {
75 int bits{64};
76 for (const InstEncoding& encoding : ENCODINGS) {
77 bits = std::min(bits, std::countr_zero(encoding.mask_value.mask));
78 }
79 return 64 - bits;
80}
81constexpr int WIDEST_LEFT_BITS{WidestLeftBits()};
82constexpr int MASK_SHIFT{64 - WIDEST_LEFT_BITS};
83
84constexpr size_t ToFastLookupIndex(u64 value) {
85 return static_cast<size_t>(value >> MASK_SHIFT);
86}
87
88constexpr size_t FastLookupSize() {
89 size_t max_width{};
90 for (const InstEncoding& encoding : ENCODINGS) {
91 max_width = std::max(max_width, ToFastLookupIndex(encoding.mask_value.mask));
92 }
93 return max_width + 1;
94}
95constexpr size_t FAST_LOOKUP_SIZE{FastLookupSize()};
96
97struct InstInfo {
98 [[nodiscard]] u64 Mask() const noexcept {
99 return static_cast<u64>(high_mask) << MASK_SHIFT;
100 }
101
102 [[nodiscard]] u64 Value() const noexcept {
103 return static_cast<u64>(high_value) << MASK_SHIFT;
104 }
105
106 u16 high_mask;
107 u16 high_value;
108 Opcode opcode;
109};
110
111constexpr auto MakeFastLookupTableIndex(size_t index) {
112 std::array<InstInfo, 2> encodings{};
113 size_t element{};
114 for (const auto& encoding : ENCODINGS) {
115 const size_t mask{ToFastLookupIndex(encoding.mask_value.mask)};
116 const size_t value{ToFastLookupIndex(encoding.mask_value.value)};
117 if ((index & mask) == value) {
118 encodings.at(element) = InstInfo{
119 .high_mask = static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT),
120 .high_value = static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT),
121 .opcode = encoding.opcode,
122 };
123 ++element;
124 }
125 }
126 return encodings;
127}
128
129/*constexpr*/ auto MakeFastLookupTable() {
130 auto encodings{std::make_unique<std::array<std::array<InstInfo, 2>, FAST_LOOKUP_SIZE>>()};
131 for (size_t index = 0; index < FAST_LOOKUP_SIZE; ++index) {
132 (*encodings)[index] = MakeFastLookupTableIndex(index);
133 }
134 return encodings;
135}
136const auto FAST_LOOKUP_TABLE{MakeFastLookupTable()};
137} // Anonymous namespace
138
139Opcode Decode(u64 insn) {
140 const auto& table{(*FAST_LOOKUP_TABLE)[ToFastLookupIndex(insn)]};
141 const auto it{std::ranges::find_if(
142 table, [insn](const InstInfo& info) { return (insn & info.Mask()) == info.Value(); })};
143 if (it == table.end()) {
144 throw NotImplementedException("Instruction 0x{:016x} is unknown / unimplemented", insn);
145 }
146 return it->opcode;
147}
148
149} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h
new file mode 100644
index 000000000..b4f080fd7
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/decode.h
@@ -0,0 +1,14 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9
10namespace Shader::Maxwell {
11
12[[nodiscard]] Opcode Decode(u64 insn);
13
14} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
new file mode 100644
index 000000000..008625cb3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
@@ -0,0 +1,108 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/maxwell/decode.h"
10#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
11#include "shader_recompiler/frontend/maxwell/opcodes.h"
12#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
13
14namespace Shader::Maxwell {
15namespace {
16union Encoding {
17 u64 raw;
18 BitField<0, 8, IR::Reg> dest_reg;
19 BitField<8, 8, IR::Reg> src_reg;
20 BitField<20, 19, u64> immediate;
21 BitField<56, 1, u64> is_negative;
22 BitField<20, 24, s64> brx_offset;
23};
24
25template <typename Callable>
26std::optional<u64> Track(Environment& env, Location block_begin, Location& pos, Callable&& func) {
27 while (pos >= block_begin) {
28 const u64 insn{env.ReadInstruction(pos.Offset())};
29 --pos;
30 if (func(insn, Decode(insn))) {
31 return insn;
32 }
33 }
34 return std::nullopt;
35}
36
37std::optional<u64> TrackLDC(Environment& env, Location block_begin, Location& pos,
38 IR::Reg brx_reg) {
39 return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) {
40 const LDC::Encoding ldc{insn};
41 return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 &&
42 ldc.mode == LDC::Mode::Default;
43 });
44}
45
46std::optional<u64> TrackSHL(Environment& env, Location block_begin, Location& pos,
47 IR::Reg ldc_reg) {
48 return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) {
49 const Encoding shl{insn};
50 return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg;
51 });
52}
53
54std::optional<u64> TrackIMNMX(Environment& env, Location block_begin, Location& pos,
55 IR::Reg shl_reg) {
56 return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) {
57 const Encoding imnmx{insn};
58 return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg;
59 });
60}
61} // Anonymous namespace
62
63std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
64 Location block_begin) {
65 const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())};
66 const Opcode brx_opcode{Decode(brx_insn)};
67 if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) {
68 throw LogicError("Tracked instruction is not BRX or JMX");
69 }
70 const IR::Reg brx_reg{Encoding{brx_insn}.src_reg};
71 const s32 brx_offset{static_cast<s32>(Encoding{brx_insn}.brx_offset)};
72
73 Location pos{brx_pos};
74 const std::optional<u64> ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)};
75 if (!ldc_insn) {
76 return std::nullopt;
77 }
78 const LDC::Encoding ldc{*ldc_insn};
79 const u32 cbuf_index{static_cast<u32>(ldc.index)};
80 const u32 cbuf_offset{static_cast<u32>(static_cast<s32>(ldc.offset.Value()))};
81 const IR::Reg ldc_reg{ldc.src_reg};
82
83 const std::optional<u64> shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)};
84 if (!shl_insn) {
85 return std::nullopt;
86 }
87 const Encoding shl{*shl_insn};
88 const IR::Reg shl_reg{shl.src_reg};
89
90 const std::optional<u64> imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)};
91 if (!imnmx_insn) {
92 return std::nullopt;
93 }
94 const Encoding imnmx{*imnmx_insn};
95 if (imnmx.is_negative != 0) {
96 return std::nullopt;
97 }
98 const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())};
99 return IndirectBranchTableInfo{
100 .cbuf_index = cbuf_index,
101 .cbuf_offset = cbuf_offset,
102 .num_entries = imnmx_immediate + 1,
103 .branch_offset = brx_offset,
104 .branch_reg = brx_reg,
105 };
106}
107
108} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
new file mode 100644
index 000000000..eee5102fa
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8
9#include "common/bit_field.h"
10#include "common/common_types.h"
11#include "shader_recompiler/environment.h"
12#include "shader_recompiler/frontend/ir/reg.h"
13#include "shader_recompiler/frontend/maxwell/location.h"
14
15namespace Shader::Maxwell {
16
17struct IndirectBranchTableInfo {
18 u32 cbuf_index{};
19 u32 cbuf_offset{};
20 u32 num_entries{};
21 s32 branch_offset{};
22 IR::Reg branch_reg{};
23};
24
25std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
26 Location block_begin);
27
28} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h
new file mode 100644
index 000000000..743d68d61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/instruction.h
@@ -0,0 +1,63 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/flow_test.h"
10#include "shader_recompiler/frontend/ir/reg.h"
11
12namespace Shader::Maxwell {
13
14struct Predicate {
15 Predicate() = default;
16 Predicate(unsigned index_, bool negated_ = false) : index{index_}, negated{negated_} {}
17 Predicate(bool value) : index{7}, negated{!value} {}
18 Predicate(u64 raw) : index{static_cast<unsigned>(raw & 7)}, negated{(raw & 8) != 0} {}
19
20 unsigned index;
21 bool negated;
22};
23
24inline bool operator==(const Predicate& lhs, const Predicate& rhs) noexcept {
25 return lhs.index == rhs.index && lhs.negated == rhs.negated;
26}
27
28inline bool operator!=(const Predicate& lhs, const Predicate& rhs) noexcept {
29 return !(lhs == rhs);
30}
31
32union Instruction {
33 Instruction(u64 raw_) : raw{raw_} {}
34
35 u64 raw;
36
37 union {
38 BitField<5, 1, u64> is_cbuf;
39 BitField<0, 5, IR::FlowTest> flow_test;
40
41 [[nodiscard]] u32 Absolute() const noexcept {
42 return static_cast<u32>(absolute);
43 }
44
45 [[nodiscard]] s32 Offset() const noexcept {
46 return static_cast<s32>(offset);
47 }
48
49 private:
50 BitField<20, 24, s64> offset;
51 BitField<20, 32, u64> absolute;
52 } branch;
53
54 [[nodiscard]] Predicate Pred() const noexcept {
55 return Predicate{pred};
56 }
57
58private:
59 BitField<16, 4, u64> pred;
60};
61static_assert(std::is_trivially_copyable_v<Instruction>);
62
63} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h
new file mode 100644
index 000000000..26d29eae2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/location.h
@@ -0,0 +1,112 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <compare>
8#include <iterator>
9
10#include <fmt/format.h>
11
12#include "common/common_types.h"
13#include "shader_recompiler/exception.h"
14
15namespace Shader::Maxwell {
16
17class Location {
18 static constexpr u32 VIRTUAL_BIAS{4};
19
20public:
21 constexpr Location() = default;
22
23 constexpr Location(u32 initial_offset) : offset{initial_offset} {
24 if (initial_offset % 8 != 0) {
25 throw InvalidArgument("initial_offset={} is not a multiple of 8", initial_offset);
26 }
27 Align();
28 }
29
30 constexpr Location Virtual() const noexcept {
31 Location virtual_location;
32 virtual_location.offset = offset - VIRTUAL_BIAS;
33 return virtual_location;
34 }
35
36 [[nodiscard]] constexpr u32 Offset() const noexcept {
37 return offset;
38 }
39
40 [[nodiscard]] constexpr bool IsVirtual() const {
41 return offset % 8 == VIRTUAL_BIAS;
42 }
43
44 constexpr auto operator<=>(const Location&) const noexcept = default;
45
46 constexpr Location operator++() noexcept {
47 const Location copy{*this};
48 Step();
49 return copy;
50 }
51
52 constexpr Location operator++(int) noexcept {
53 Step();
54 return *this;
55 }
56
57 constexpr Location operator--() noexcept {
58 const Location copy{*this};
59 Back();
60 return copy;
61 }
62
63 constexpr Location operator--(int) noexcept {
64 Back();
65 return *this;
66 }
67
68 constexpr Location operator+(int number) const {
69 Location new_pc{*this};
70 while (number > 0) {
71 --number;
72 ++new_pc;
73 }
74 while (number < 0) {
75 ++number;
76 --new_pc;
77 }
78 return new_pc;
79 }
80
81 constexpr Location operator-(int number) const {
82 return operator+(-number);
83 }
84
85private:
86 constexpr void Align() {
87 offset += offset % 32 == 0 ? 8 : 0;
88 }
89
90 constexpr void Step() {
91 offset += 8 + (offset % 32 == 24 ? 8 : 0);
92 }
93
94 constexpr void Back() {
95 offset -= 8 + (offset % 32 == 8 ? 8 : 0);
96 }
97
98 u32 offset{0xcccccccc};
99};
100
101} // namespace Shader::Maxwell
102
103template <>
104struct fmt::formatter<Shader::Maxwell::Location> {
105 constexpr auto parse(format_parse_context& ctx) {
106 return ctx.begin();
107 }
108 template <typename FormatContext>
109 auto format(const Shader::Maxwell::Location& location, FormatContext& ctx) {
110 return fmt::format_to(ctx.out(), "{:04x}", location.Offset());
111 }
112};
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc
new file mode 100644
index 000000000..2fee591bb
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc
@@ -0,0 +1,286 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5INST(AL2P, "AL2P", "1110 1111 1010 0---")
6INST(ALD, "ALD", "1110 1111 1101 1---")
7INST(AST, "AST", "1110 1111 1111 0---")
8INST(ATOM_cas, "ATOM (cas)", "1110 1110 1111 ----")
9INST(ATOM, "ATOM", "1110 1101 ---- ----")
10INST(ATOMS_cas, "ATOMS (cas)", "1110 1110 ---- ----")
11INST(ATOMS, "ATOMS", "1110 1100 ---- ----")
12INST(B2R, "B2R", "1111 0000 1011 1---")
13INST(BAR, "BAR", "1111 0000 1010 1---")
14INST(BFE_reg, "BFE (reg)", "0101 1100 0000 0---")
15INST(BFE_cbuf, "BFE (cbuf)", "0100 1100 0000 0---")
16INST(BFE_imm, "BFE (imm)", "0011 100- 0000 0---")
17INST(BFI_reg, "BFI (reg)", "0101 1011 1111 0---")
18INST(BFI_rc, "BFI (rc)", "0101 0011 1111 0---")
19INST(BFI_cr, "BFI (cr)", "0100 1011 1111 0---")
20INST(BFI_imm, "BFI (imm)", "0011 011- 1111 0---")
21INST(BPT, "BPT", "1110 0011 1010 ----")
22INST(BRA, "BRA", "1110 0010 0100 ----")
23INST(BRK, "BRK", "1110 0011 0100 ----")
24INST(BRX, "BRX", "1110 0010 0101 ----")
25INST(CAL, "CAL", "1110 0010 0110 ----")
26INST(CCTL, "CCTL", "1110 1111 011- ----")
27INST(CCTLL, "CCTLL", "1110 1111 100- ----")
28INST(CONT, "CONT", "1110 0011 0101 ----")
29INST(CS2R, "CS2R", "0101 0000 1100 1---")
30INST(CSET, "CSET", "0101 0000 1001 1---")
31INST(CSETP, "CSETP", "0101 0000 1010 0---")
32INST(DADD_reg, "DADD (reg)", "0101 1100 0111 0---")
33INST(DADD_cbuf, "DADD (cbuf)", "0100 1100 0111 0---")
34INST(DADD_imm, "DADD (imm)", "0011 100- 0111 0---")
35INST(DEPBAR, "DEPBAR", "1111 0000 1111 0---")
36INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----")
37INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----")
38INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----")
39INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----")
40INST(DMNMX_reg, "DMNMX (reg)", "0101 1100 0101 0---")
41INST(DMNMX_cbuf, "DMNMX (cbuf)", "0100 1100 0101 0---")
42INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---")
43INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---")
44INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---")
45INST(DMUL_imm, "DMUL (imm)", "0011 100- 1000 0---")
46INST(DSET_reg, "DSET (reg)", "0101 1001 0--- ----")
47INST(DSET_cbuf, "DSET (cbuf)", "0100 1001 0--- ----")
48INST(DSET_imm, "DSET (imm)", "0011 001- 0--- ----")
49INST(DSETP_reg, "DSETP (reg)", "0101 1011 1000 ----")
50INST(DSETP_cbuf, "DSETP (cbuf)", "0100 1011 1000 ----")
51INST(DSETP_imm, "DSETP (imm)", "0011 011- 1000 ----")
52INST(EXIT, "EXIT", "1110 0011 0000 ----")
53INST(F2F_reg, "F2F (reg)", "0101 1100 1010 1---")
54INST(F2F_cbuf, "F2F (cbuf)", "0100 1100 1010 1---")
55INST(F2F_imm, "F2F (imm)", "0011 100- 1010 1---")
56INST(F2I_reg, "F2I (reg)", "0101 1100 1011 0---")
57INST(F2I_cbuf, "F2I (cbuf)", "0100 1100 1011 0---")
58INST(F2I_imm, "F2I (imm)", "0011 100- 1011 0---")
59INST(FADD_reg, "FADD (reg)", "0101 1100 0101 1---")
60INST(FADD_cbuf, "FADD (cbuf)", "0100 1100 0101 1---")
61INST(FADD_imm, "FADD (imm)", "0011 100- 0101 1---")
62INST(FADD32I, "FADD32I", "0000 10-- ---- ----")
63INST(FCHK_reg, "FCHK (reg)", "0101 1100 1000 1---")
64INST(FCHK_cbuf, "FCHK (cbuf)", "0100 1100 1000 1---")
65INST(FCHK_imm, "FCHK (imm)", "0011 100- 1000 1---")
66INST(FCMP_reg, "FCMP (reg)", "0101 1011 1010 ----")
67INST(FCMP_rc, "FCMP (rc)", "0101 0011 1010 ----")
68INST(FCMP_cr, "FCMP (cr)", "0100 1011 1010 ----")
69INST(FCMP_imm, "FCMP (imm)", "0011 011- 1010 ----")
70INST(FFMA_reg, "FFMA (reg)", "0101 1001 1--- ----")
71INST(FFMA_rc, "FFMA (rc)", "0101 0001 1--- ----")
72INST(FFMA_cr, "FFMA (cr)", "0100 1001 1--- ----")
73INST(FFMA_imm, "FFMA (imm)", "0011 001- 1--- ----")
74INST(FFMA32I, "FFMA32I", "0000 11-- ---- ----")
75INST(FLO_reg, "FLO (reg)", "0101 1100 0011 0---")
76INST(FLO_cbuf, "FLO (cbuf)", "0100 1100 0011 0---")
77INST(FLO_imm, "FLO (imm)", "0011 100- 0011 0---")
78INST(FMNMX_reg, "FMNMX (reg)", "0101 1100 0110 0---")
79INST(FMNMX_cbuf, "FMNMX (cbuf)", "0100 1100 0110 0---")
80INST(FMNMX_imm, "FMNMX (imm)", "0011 100- 0110 0---")
81INST(FMUL_reg, "FMUL (reg)", "0101 1100 0110 1---")
82INST(FMUL_cbuf, "FMUL (cbuf)", "0100 1100 0110 1---")
83INST(FMUL_imm, "FMUL (imm)", "0011 100- 0110 1---")
84INST(FMUL32I, "FMUL32I", "0001 1110 ---- ----")
85INST(FSET_reg, "FSET (reg)", "0101 1000 ---- ----")
86INST(FSET_cbuf, "FSET (cbuf)", "0100 1000 ---- ----")
87INST(FSET_imm, "FSET (imm)", "0011 000- ---- ----")
88INST(FSETP_reg, "FSETP (reg)", "0101 1011 1011 ----")
89INST(FSETP_cbuf, "FSETP (cbuf)", "0100 1011 1011 ----")
90INST(FSETP_imm, "FSETP (imm)", "0011 011- 1011 ----")
91INST(FSWZADD, "FSWZADD", "0101 0000 1111 1---")
92INST(GETCRSPTR, "GETCRSPTR", "1110 0010 1100 ----")
93INST(GETLMEMBASE, "GETLMEMBASE", "1110 0010 1101 ----")
94INST(HADD2_reg, "HADD2 (reg)", "0101 1101 0001 0---")
95INST(HADD2_cbuf, "HADD2 (cbuf)", "0111 101- 1--- ----")
96INST(HADD2_imm, "HADD2 (imm)", "0111 101- 0--- ----")
97INST(HADD2_32I, "HADD2_32I", "0010 110- ---- ----")
98INST(HFMA2_reg, "HFMA2 (reg)", "0101 1101 0000 0---")
99INST(HFMA2_rc, "HFMA2 (rc)", "0110 0--- 1--- ----")
100INST(HFMA2_cr, "HFMA2 (cr)", "0111 0--- 1--- ----")
101INST(HFMA2_imm, "HFMA2 (imm)", "0111 0--- 0--- ----")
102INST(HFMA2_32I, "HFMA2_32I", "0010 100- ---- ----")
103INST(HMUL2_reg, "HMUL2 (reg)", "0101 1101 0000 1---")
104INST(HMUL2_cbuf, "HMUL2 (cbuf)", "0111 100- 1--- ----")
105INST(HMUL2_imm, "HMUL2 (imm)", "0111 100- 0--- ----")
106INST(HMUL2_32I, "HMUL2_32I", "0010 101- ---- ----")
107INST(HSET2_reg, "HSET2 (reg)", "0101 1101 0001 1---")
108INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 110- 1--- ----")
109INST(HSET2_imm, "HSET2 (imm)", "0111 110- 0--- ----")
110INST(HSETP2_reg, "HSETP2 (reg)", "0101 1101 0010 0---")
111INST(HSETP2_cbuf, "HSETP2 (cbuf)", "0111 111- 1--- ----")
112INST(HSETP2_imm, "HSETP2 (imm)", "0111 111- 0--- ----")
113INST(I2F_reg, "I2F (reg)", "0101 1100 1011 1---")
114INST(I2F_cbuf, "I2F (cbuf)", "0100 1100 1011 1---")
115INST(I2F_imm, "I2F (imm)", "0011 100- 1011 1---")
116INST(I2I_reg, "I2I (reg)", "0101 1100 1110 0---")
117INST(I2I_cbuf, "I2I (cbuf)", "0100 1100 1110 0---")
118INST(I2I_imm, "I2I (imm)", "0011 100- 1110 0---")
119INST(IADD_reg, "IADD (reg)", "0101 1100 0001 0---")
120INST(IADD_cbuf, "IADD (cbuf)", "0100 1100 0001 0---")
121INST(IADD_imm, "IADD (imm)", "0011 100- 0001 0---")
122INST(IADD3_reg, "IADD3 (reg)", "0101 1100 1100 ----")
123INST(IADD3_cbuf, "IADD3 (cbuf)", "0100 1100 1100 ----")
124INST(IADD3_imm, "IADD3 (imm)", "0011 100- 1100 ----")
125INST(IADD32I, "IADD32I", "0001 110- ---- ----")
126INST(ICMP_reg, "ICMP (reg)", "0101 1011 0100 ----")
127INST(ICMP_rc, "ICMP (rc)", "0101 0011 0100 ----")
128INST(ICMP_cr, "ICMP (cr)", "0100 1011 0100 ----")
129INST(ICMP_imm, "ICMP (imm)", "0011 011- 0100 ----")
130INST(IDE, "IDE", "1110 0011 1001 ----")
131INST(IDP_reg, "IDP (reg)", "0101 0011 1111 1---")
132INST(IDP_imm, "IDP (imm)", "0101 0011 1101 1---")
133INST(IMAD_reg, "IMAD (reg)", "0101 1010 0--- ----")
134INST(IMAD_rc, "IMAD (rc)", "0101 0010 0--- ----")
135INST(IMAD_cr, "IMAD (cr)", "0100 1010 0--- ----")
136INST(IMAD_imm, "IMAD (imm)", "0011 010- 0--- ----")
137INST(IMAD32I, "IMAD32I", "1000 00-- ---- ----")
138INST(IMADSP_reg, "IMADSP (reg)", "0101 1010 1--- ----")
139INST(IMADSP_rc, "IMADSP (rc)", "0101 0010 1--- ----")
140INST(IMADSP_cr, "IMADSP (cr)", "0100 1010 1--- ----")
141INST(IMADSP_imm, "IMADSP (imm)", "0011 010- 1--- ----")
142INST(IMNMX_reg, "IMNMX (reg)", "0101 1100 0010 0---")
143INST(IMNMX_cbuf, "IMNMX (cbuf)", "0100 1100 0010 0---")
144INST(IMNMX_imm, "IMNMX (imm)", "0011 100- 0010 0---")
145INST(IMUL_reg, "IMUL (reg)", "0101 1100 0011 1---")
146INST(IMUL_cbuf, "IMUL (cbuf)", "0100 1100 0011 1---")
147INST(IMUL_imm, "IMUL (imm)", "0011 100- 0011 1---")
148INST(IMUL32I, "IMUL32I", "0001 1111 ---- ----")
149INST(IPA, "IPA", "1110 0000 ---- ----")
150INST(ISBERD, "ISBERD", "1110 1111 1101 0---")
151INST(ISCADD_reg, "ISCADD (reg)", "0101 1100 0001 1---")
152INST(ISCADD_cbuf, "ISCADD (cbuf)", "0100 1100 0001 1---")
153INST(ISCADD_imm, "ISCADD (imm)", "0011 100- 0001 1---")
154INST(ISCADD32I, "ISCADD32I", "0001 01-- ---- ----")
155INST(ISET_reg, "ISET (reg)", "0101 1011 0101 ----")
156INST(ISET_cbuf, "ISET (cbuf)", "0100 1011 0101 ----")
157INST(ISET_imm, "ISET (imm)", "0011 011- 0101 ----")
158INST(ISETP_reg, "ISETP (reg)", "0101 1011 0110 ----")
159INST(ISETP_cbuf, "ISETP (cbuf)", "0100 1011 0110 ----")
160INST(ISETP_imm, "ISETP (imm)", "0011 011- 0110 ----")
161INST(JCAL, "JCAL", "1110 0010 0010 ----")
162INST(JMP, "JMP", "1110 0010 0001 ----")
163INST(JMX, "JMX", "1110 0010 0000 ----")
164INST(KIL, "KIL", "1110 0011 0011 ----")
165INST(LD, "LD", "100- ---- ---- ----")
166INST(LDC, "LDC", "1110 1111 1001 0---")
167INST(LDG, "LDG", "1110 1110 1101 0---")
168INST(LDL, "LDL", "1110 1111 0100 0---")
169INST(LDS, "LDS", "1110 1111 0100 1---")
170INST(LEA_hi_reg, "LEA (hi reg)", "0101 1011 1101 1---")
171INST(LEA_hi_cbuf, "LEA (hi cbuf)", "0001 10-- ---- ----")
172INST(LEA_lo_reg, "LEA (lo reg)", "0101 1011 1101 0---")
173INST(LEA_lo_cbuf, "LEA (lo cbuf)", "0100 1011 1101 ----")
174INST(LEA_lo_imm, "LEA (lo imm)", "0011 011- 1101 0---")
175INST(LEPC, "LEPC", "0101 0000 1101 0---")
176INST(LONGJMP, "LONGJMP", "1110 0011 0001 ----")
177INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---")
178INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---")
179INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---")
180INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---")
181INST(LOP3_cbuf, "LOP3 (cbuf)", "0000 001- ---- ----")
182INST(LOP3_imm, "LOP3 (imm)", "0011 11-- ---- ----")
183INST(LOP32I, "LOP32I", "0000 01-- ---- ----")
184INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---")
185INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---")
186INST(MOV_cbuf, "MOV (cbuf)", "0100 1100 1001 1---")
187INST(MOV_imm, "MOV (imm)", "0011 100- 1001 1---")
188INST(MOV32I, "MOV32I", "0000 0001 0000 ----")
189INST(MUFU, "MUFU", "0101 0000 1000 0---")
190INST(NOP, "NOP", "0101 0000 1011 0---")
191INST(OUT_reg, "OUT (reg)", "1111 1011 1110 0---")
192INST(OUT_cbuf, "OUT (cbuf)", "1110 1011 1110 0---")
193INST(OUT_imm, "OUT (imm)", "1111 011- 1110 0---")
194INST(P2R_reg, "P2R (reg)", "0101 1100 1110 1---")
195INST(P2R_cbuf, "P2R (cbuf)", "0100 1100 1110 1---")
196INST(P2R_imm, "P2R (imm)", "0011 1000 1110 1---")
197INST(PBK, "PBK", "1110 0010 1010 ----")
198INST(PCNT, "PCNT", "1110 0010 1011 ----")
199INST(PEXIT, "PEXIT", "1110 0010 0011 ----")
200INST(PIXLD, "PIXLD", "1110 1111 1110 1---")
201INST(PLONGJMP, "PLONGJMP", "1110 0010 1000 ----")
202INST(POPC_reg, "POPC (reg)", "0101 1100 0000 1---")
203INST(POPC_cbuf, "POPC (cbuf)", "0100 1100 0000 1---")
204INST(POPC_imm, "POPC (imm)", "0011 100- 0000 1---")
205INST(PRET, "PRET", "1110 0010 0111 ----")
206INST(PRMT_reg, "PRMT (reg)", "0101 1011 1100 ----")
207INST(PRMT_rc, "PRMT (rc)", "0101 0011 1100 ----")
208INST(PRMT_cr, "PRMT (cr)", "0100 1011 1100 ----")
209INST(PRMT_imm, "PRMT (imm)", "0011 011- 1100 ----")
210INST(PSET, "PSET", "0101 0000 1000 1---")
211INST(PSETP, "PSETP", "0101 0000 1001 0---")
212INST(R2B, "R2B", "1111 0000 1100 0---")
213INST(R2P_reg, "R2P (reg)", "0101 1100 1111 0---")
214INST(R2P_cbuf, "R2P (cbuf)", "0100 1100 1111 0---")
215INST(R2P_imm, "R2P (imm)", "0011 100- 1111 0---")
216INST(RAM, "RAM", "1110 0011 1000 ----")
217INST(RED, "RED", "1110 1011 1111 1---")
218INST(RET, "RET", "1110 0011 0010 ----")
219INST(RRO_reg, "RRO (reg)", "0101 1100 1001 0---")
220INST(RRO_cbuf, "RRO (cbuf)", "0100 1100 1001 0---")
221INST(RRO_imm, "RRO (imm)", "0011 100- 1001 0---")
222INST(RTT, "RTT", "1110 0011 0110 ----")
223INST(S2R, "S2R", "1111 0000 1100 1---")
224INST(SAM, "SAM", "1110 0011 0111 ----")
225INST(SEL_reg, "SEL (reg)", "0101 1100 1010 0---")
226INST(SEL_cbuf, "SEL (cbuf)", "0100 1100 1010 0---")
227INST(SEL_imm, "SEL (imm)", "0011 100- 1010 0---")
228INST(SETCRSPTR, "SETCRSPTR", "1110 0010 1110 ----")
229INST(SETLMEMBASE, "SETLMEMBASE", "1110 0010 1111 ----")
230INST(SHF_l_reg, "SHF (l reg)", "0101 1011 1111 1---")
231INST(SHF_l_imm, "SHF (l imm)", "0011 011- 1111 1---")
232INST(SHF_r_reg, "SHF (r reg)", "0101 1100 1111 1---")
233INST(SHF_r_imm, "SHF (r imm)", "0011 100- 1111 1---")
234INST(SHFL, "SHFL", "1110 1111 0001 0---")
235INST(SHL_reg, "SHL (reg)", "0101 1100 0100 1---")
236INST(SHL_cbuf, "SHL (cbuf)", "0100 1100 0100 1---")
237INST(SHL_imm, "SHL (imm)", "0011 100- 0100 1---")
238INST(SHR_reg, "SHR (reg)", "0101 1100 0010 1---")
239INST(SHR_cbuf, "SHR (cbuf)", "0100 1100 0010 1---")
240INST(SHR_imm, "SHR (imm)", "0011 100- 0010 1---")
241INST(SSY, "SSY", "1110 0010 1001 ----")
242INST(ST, "ST", "101- ---- ---- ----")
243INST(STG, "STG", "1110 1110 1101 1---")
244INST(STL, "STL", "1110 1111 0101 0---")
245INST(STP, "STP", "1110 1110 1010 0---")
246INST(STS, "STS", "1110 1111 0101 1---")
247INST(SUATOM, "SUATOM", "1110 1010 0--- ----")
248INST(SUATOM_cas, "SUATOM_cas", "1110 1010 1--- ----")
249INST(SULD, "SULD", "1110 1011 000- ----")
250INST(SURED, "SURED", "1110 1011 010- ----")
251INST(SUST, "SUST", "1110 1011 001- ----")
252INST(SYNC, "SYNC", "1111 0000 1111 1---")
253INST(TEX, "TEX", "1100 0--- ---- ----")
254INST(TEX_b, "TEX (b)", "1101 1110 10-- ----")
255INST(TEXS, "TEXS", "1101 -00- ---- ----")
256INST(TLD, "TLD", "1101 1100 ---- ----")
257INST(TLD_b, "TLD (b)", "1101 1101 ---- ----")
258INST(TLD4, "TLD4", "1100 10-- ---- ----")
259INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----")
260INST(TLD4S, "TLD4S", "1101 1111 -0-- ----")
261INST(TLDS, "TLDS", "1101 -01- ---- ----")
262INST(TMML, "TMML", "1101 1111 0101 1---")
263INST(TMML_b, "TMML (b)", "1101 1111 0110 0---")
264INST(TXA, "TXA", "1101 1111 0100 0---")
265INST(TXD, "TXD", "1101 1110 00-- ----")
266INST(TXD_b, "TXD (b)", "1101 1110 01-- ----")
267INST(TXQ, "TXQ", "1101 1111 0100 1---")
268INST(TXQ_b, "TXQ (b)", "1101 1111 0101 0---")
269INST(VABSDIFF, "VABSDIFF", "0101 0100 ---- ----")
270INST(VABSDIFF4, "VABSDIFF4", "0101 0000 0--- ----")
271INST(VADD, "VADD", "0010 00-- ---- ----")
272INST(VMAD, "VMAD", "0101 1111 ---- ----")
273INST(VMNMX, "VMNMX", "0011 101- ---- ----")
274INST(VOTE, "VOTE", "0101 0000 1101 1---")
275INST(VOTE_vtg, "VOTE (vtg)", "0101 0000 1110 0---")
276INST(VSET, "VSET", "0100 000- ---- ----")
277INST(VSETP, "VSETP", "0101 0000 1111 0---")
278INST(VSHL, "VSHL", "0101 0111 ---- ----")
279INST(VSHR, "VSHR", "0101 0110 ---- ----")
280INST(XMAD_reg, "XMAD (reg)", "0101 1011 00-- ----")
281INST(XMAD_rc, "XMAD (rc)", "0101 0001 0--- ----")
282INST(XMAD_cr, "XMAD (cr)", "0100 111- ---- ----")
283INST(XMAD_imm, "XMAD (imm)", "0011 011- 00-- ----")
284
285// Removed due to its weird formatting making fast tables larger
286// INST(CCTLT, "CCTLT", "1110 1011 1111 0--0")
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp
new file mode 100644
index 000000000..ccc40c20c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp
@@ -0,0 +1,26 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9
10namespace Shader::Maxwell {
11namespace {
12constexpr std::array NAME_TABLE{
13#define INST(name, cute, encode) cute,
14#include "maxwell.inc"
15#undef INST
16};
17} // Anonymous namespace
18
19const char* NameOf(Opcode opcode) {
20 if (static_cast<size_t>(opcode) >= NAME_TABLE.size()) {
21 throw InvalidArgument("Invalid opcode with raw value {}", static_cast<int>(opcode));
22 }
23 return NAME_TABLE[static_cast<size_t>(opcode)];
24}
25
26} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.h b/src/shader_recompiler/frontend/maxwell/opcodes.h
new file mode 100644
index 000000000..cd574f29d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/opcodes.h
@@ -0,0 +1,30 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <fmt/format.h>
8
9namespace Shader::Maxwell {
10
11enum class Opcode {
12#define INST(name, cute, encode) name,
13#include "maxwell.inc"
14#undef INST
15};
16
17const char* NameOf(Opcode opcode);
18
19} // namespace Shader::Maxwell
20
21template <>
22struct fmt::formatter<Shader::Maxwell::Opcode> {
23 constexpr auto parse(format_parse_context& ctx) {
24 return ctx.begin();
25 }
26 template <typename FormatContext>
27 auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) {
28 return format_to(ctx.out(), "{}", NameOf(opcode));
29 }
30};
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
new file mode 100644
index 000000000..8b3e0a15c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -0,0 +1,883 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7#include <string>
8#include <unordered_map>
9#include <utility>
10#include <vector>
11#include <version>
12
13#include <fmt/format.h>
14
15#include <boost/intrusive/list.hpp>
16
17#include "shader_recompiler/environment.h"
18#include "shader_recompiler/frontend/ir/basic_block.h"
19#include "shader_recompiler/frontend/ir/ir_emitter.h"
20#include "shader_recompiler/frontend/maxwell/decode.h"
21#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
22#include "shader_recompiler/frontend/maxwell/translate/translate.h"
23#include "shader_recompiler/object_pool.h"
24
25namespace Shader::Maxwell {
26namespace {
27struct Statement;
28
29// Use normal_link because we are not guaranteed to destroy the tree in order
30using ListBaseHook =
31 boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::normal_link>>;
32
33using Tree = boost::intrusive::list<Statement,
34 // Allow using Statement without a definition
35 boost::intrusive::base_hook<ListBaseHook>,
36 // Avoid linear complexity on splice, size is never called
37 boost::intrusive::constant_time_size<false>>;
38using Node = Tree::iterator;
39
40enum class StatementType {
41 Code,
42 Goto,
43 Label,
44 If,
45 Loop,
46 Break,
47 Return,
48 Kill,
49 Unreachable,
50 Function,
51 Identity,
52 Not,
53 Or,
54 SetVariable,
55 SetIndirectBranchVariable,
56 Variable,
57 IndirectBranchCond,
58};
59
60bool HasChildren(StatementType type) {
61 switch (type) {
62 case StatementType::If:
63 case StatementType::Loop:
64 case StatementType::Function:
65 return true;
66 default:
67 return false;
68 }
69}
70
71struct Goto {};
72struct Label {};
73struct If {};
74struct Loop {};
75struct Break {};
76struct Return {};
77struct Kill {};
78struct Unreachable {};
79struct FunctionTag {};
80struct Identity {};
81struct Not {};
82struct Or {};
83struct SetVariable {};
84struct SetIndirectBranchVariable {};
85struct Variable {};
86struct IndirectBranchCond {};
87
88#ifdef _MSC_VER
89#pragma warning(push)
90#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement
91#endif
92struct Statement : ListBaseHook {
93 Statement(const Flow::Block* block_, Statement* up_)
94 : block{block_}, up{up_}, type{StatementType::Code} {}
95 Statement(Goto, Statement* cond_, Node label_, Statement* up_)
96 : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {}
97 Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {}
98 Statement(If, Statement* cond_, Tree&& children_, Statement* up_)
99 : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {}
100 Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_)
101 : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {}
102 Statement(Break, Statement* cond_, Statement* up_)
103 : cond{cond_}, up{up_}, type{StatementType::Break} {}
104 Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {}
105 Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {}
106 Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {}
107 Statement(FunctionTag) : children{}, type{StatementType::Function} {}
108 Statement(Identity, IR::Condition cond_, Statement* up_)
109 : guest_cond{cond_}, up{up_}, type{StatementType::Identity} {}
110 Statement(Not, Statement* op_, Statement* up_) : op{op_}, up{up_}, type{StatementType::Not} {}
111 Statement(Or, Statement* op_a_, Statement* op_b_, Statement* up_)
112 : op_a{op_a_}, op_b{op_b_}, up{up_}, type{StatementType::Or} {}
113 Statement(SetVariable, u32 id_, Statement* op_, Statement* up_)
114 : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {}
115 Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_, Statement* up_)
116 : branch_offset{branch_offset_},
117 branch_reg{branch_reg_}, up{up_}, type{StatementType::SetIndirectBranchVariable} {}
118 Statement(Variable, u32 id_, Statement* up_)
119 : id{id_}, up{up_}, type{StatementType::Variable} {}
120 Statement(IndirectBranchCond, u32 location_, Statement* up_)
121 : location{location_}, up{up_}, type{StatementType::IndirectBranchCond} {}
122
123 ~Statement() {
124 if (HasChildren(type)) {
125 std::destroy_at(&children);
126 }
127 }
128
129 union {
130 const Flow::Block* block;
131 Node label;
132 Tree children;
133 IR::Condition guest_cond;
134 Statement* op;
135 Statement* op_a;
136 u32 location;
137 s32 branch_offset;
138 };
139 union {
140 Statement* cond;
141 Statement* op_b;
142 u32 id;
143 IR::Reg branch_reg;
144 };
145 Statement* up{};
146 StatementType type;
147};
148#ifdef _MSC_VER
149#pragma warning(pop)
150#endif
151
152std::string DumpExpr(const Statement* stmt) {
153 switch (stmt->type) {
154 case StatementType::Identity:
155 return fmt::format("{}", stmt->guest_cond);
156 case StatementType::Not:
157 return fmt::format("!{}", DumpExpr(stmt->op));
158 case StatementType::Or:
159 return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b));
160 case StatementType::Variable:
161 return fmt::format("goto_L{}", stmt->id);
162 case StatementType::IndirectBranchCond:
163 return fmt::format("(indirect_branch == {:x})", stmt->location);
164 default:
165 return "<invalid type>";
166 }
167}
168
169[[maybe_unused]] std::string DumpTree(const Tree& tree, u32 indentation = 0) {
170 std::string ret;
171 std::string indent(indentation, ' ');
172 for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) {
173 switch (stmt->type) {
174 case StatementType::Code:
175 ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent,
176 stmt->block->begin.Offset(), stmt->block->end.Offset(),
177 reinterpret_cast<uintptr_t>(stmt->block));
178 break;
179 case StatementType::Goto:
180 ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond),
181 stmt->label->id);
182 break;
183 case StatementType::Label:
184 ret += fmt::format("{}L{}:\n", indent, stmt->id);
185 break;
186 case StatementType::If:
187 ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond));
188 ret += DumpTree(stmt->children, indentation + 4);
189 ret += fmt::format("{} }}\n", indent);
190 break;
191 case StatementType::Loop:
192 ret += fmt::format("{} do {{\n", indent);
193 ret += DumpTree(stmt->children, indentation + 4);
194 ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond));
195 break;
196 case StatementType::Break:
197 ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond));
198 break;
199 case StatementType::Return:
200 ret += fmt::format("{} return;\n", indent);
201 break;
202 case StatementType::Kill:
203 ret += fmt::format("{} kill;\n", indent);
204 break;
205 case StatementType::Unreachable:
206 ret += fmt::format("{} unreachable;\n", indent);
207 break;
208 case StatementType::SetVariable:
209 ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op));
210 break;
211 case StatementType::SetIndirectBranchVariable:
212 ret += fmt::format("{} indirect_branch = {} + {};\n", indent, stmt->branch_reg,
213 stmt->branch_offset);
214 break;
215 case StatementType::Function:
216 case StatementType::Identity:
217 case StatementType::Not:
218 case StatementType::Or:
219 case StatementType::Variable:
220 case StatementType::IndirectBranchCond:
221 throw LogicError("Statement can't be printed");
222 }
223 }
224 return ret;
225}
226
227void SanitizeNoBreaks(const Tree& tree) {
228 if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) {
229 throw NotImplementedException("Capturing statement with break nodes");
230 }
231}
232
233size_t Level(Node stmt) {
234 size_t level{0};
235 Statement* node{stmt->up};
236 while (node) {
237 ++level;
238 node = node->up;
239 }
240 return level;
241}
242
243bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) {
244 const size_t goto_level{Level(goto_stmt)};
245 const size_t label_level{Level(label_stmt)};
246 size_t min_level;
247 size_t max_level;
248 Node min;
249 Node max;
250 if (label_level < goto_level) {
251 min_level = label_level;
252 max_level = goto_level;
253 min = label_stmt;
254 max = goto_stmt;
255 } else { // goto_level < label_level
256 min_level = goto_level;
257 max_level = label_level;
258 min = goto_stmt;
259 max = label_stmt;
260 }
261 while (max_level > min_level) {
262 --max_level;
263 max = max->up;
264 }
265 return min->up == max->up;
266}
267
268bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) {
269 return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt);
270}
271
272[[maybe_unused]] bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept {
273 Node it{goto_stmt};
274 do {
275 if (it == label_stmt) {
276 return true;
277 }
278 --it;
279 } while (it != goto_stmt->up->children.begin());
280 while (it != goto_stmt->up->children.end()) {
281 if (it == label_stmt) {
282 return true;
283 }
284 ++it;
285 }
286 return false;
287}
288
289Node SiblingFromNephew(Node uncle, Node nephew) noexcept {
290 Statement* const parent{uncle->up};
291 Statement* it{&*nephew};
292 while (it->up != parent) {
293 it = it->up;
294 }
295 return Tree::s_iterator_to(*it);
296}
297
298bool AreOrdered(Node left_sibling, Node right_sibling) noexcept {
299 const Node end{right_sibling->up->children.end()};
300 for (auto it = right_sibling; it != end; ++it) {
301 if (it == left_sibling) {
302 return false;
303 }
304 }
305 return true;
306}
307
308bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept {
309 const Node sibling{SiblingFromNephew(goto_stmt, label_stmt)};
310 return AreOrdered(sibling, goto_stmt);
311}
312
313class GotoPass {
314public:
315 explicit GotoPass(Flow::CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} {
316 std::vector gotos{BuildTree(cfg)};
317 const auto end{gotos.rend()};
318 for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) {
319 RemoveGoto(*goto_stmt);
320 }
321 }
322
323 Statement& RootStatement() noexcept {
324 return root_stmt;
325 }
326
327private:
328 void RemoveGoto(Node goto_stmt) {
329 // Force goto_stmt and label_stmt to be directly related
330 const Node label_stmt{goto_stmt->label};
331 if (IsIndirectlyRelated(goto_stmt, label_stmt)) {
332 // Move goto_stmt out using outward-movement transformation until it becomes
333 // directly related to label_stmt
334 while (!IsDirectlyRelated(goto_stmt, label_stmt)) {
335 goto_stmt = MoveOutward(goto_stmt);
336 }
337 }
338 // Force goto_stmt and label_stmt to be siblings
339 if (IsDirectlyRelated(goto_stmt, label_stmt)) {
340 const size_t label_level{Level(label_stmt)};
341 size_t goto_level{Level(goto_stmt)};
342 if (goto_level > label_level) {
343 // Move goto_stmt out of its level using outward-movement transformations
344 while (goto_level > label_level) {
345 goto_stmt = MoveOutward(goto_stmt);
346 --goto_level;
347 }
348 } else { // Level(goto_stmt) < Level(label_stmt)
349 if (NeedsLift(goto_stmt, label_stmt)) {
350 // Lift goto_stmt to above stmt containing label_stmt using goto-lifting
351 // transformations
352 goto_stmt = Lift(goto_stmt);
353 }
354 // Move goto_stmt into label_stmt's level using inward-movement transformation
355 while (goto_level < label_level) {
356 goto_stmt = MoveInward(goto_stmt);
357 ++goto_level;
358 }
359 }
360 }
361 // Expensive operation:
362 // if (!AreSiblings(goto_stmt, label_stmt)) {
363 // throw LogicError("Goto is not a sibling with the label");
364 // }
365 // goto_stmt and label_stmt are guaranteed to be siblings, eliminate
366 if (std::next(goto_stmt) == label_stmt) {
367 // Simply eliminate the goto if the label is next to it
368 goto_stmt->up->children.erase(goto_stmt);
369 } else if (AreOrdered(goto_stmt, label_stmt)) {
370 // Eliminate goto_stmt with a conditional
371 EliminateAsConditional(goto_stmt, label_stmt);
372 } else {
373 // Eliminate goto_stmt with a loop
374 EliminateAsLoop(goto_stmt, label_stmt);
375 }
376 }
377
378 std::vector<Node> BuildTree(Flow::CFG& cfg) {
379 u32 label_id{0};
380 std::vector<Node> gotos;
381 Flow::Function& first_function{cfg.Functions().front()};
382 BuildTree(cfg, first_function, label_id, gotos, root_stmt.children.end(), std::nullopt);
383 return gotos;
384 }
385
386 void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id,
387 std::vector<Node>& gotos, Node function_insert_point,
388 std::optional<Node> return_label) {
389 Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false}, &root_stmt)};
390 Tree& root{root_stmt.children};
391 std::unordered_map<Flow::Block*, Node> local_labels;
392 local_labels.reserve(function.blocks.size());
393
394 for (Flow::Block& block : function.blocks) {
395 Statement* const label{pool.Create(Label{}, label_id, &root_stmt)};
396 const Node label_it{root.insert(function_insert_point, *label)};
397 local_labels.emplace(&block, label_it);
398 ++label_id;
399 }
400 for (Flow::Block& block : function.blocks) {
401 const Node label{local_labels.at(&block)};
402 // Insertion point
403 const Node ip{std::next(label)};
404
405 // Reset goto variables before the first block and after its respective label
406 const auto make_reset_variable{[&]() -> Statement& {
407 return *pool.Create(SetVariable{}, label->id, false_stmt, &root_stmt);
408 }};
409 root.push_front(make_reset_variable());
410 root.insert(ip, make_reset_variable());
411 root.insert(ip, *pool.Create(&block, &root_stmt));
412
413 switch (block.end_class) {
414 case Flow::EndClass::Branch: {
415 Statement* const always_cond{
416 pool.Create(Identity{}, IR::Condition{true}, &root_stmt)};
417 if (block.cond == IR::Condition{true}) {
418 const Node true_label{local_labels.at(block.branch_true)};
419 gotos.push_back(
420 root.insert(ip, *pool.Create(Goto{}, always_cond, true_label, &root_stmt)));
421 } else if (block.cond == IR::Condition{false}) {
422 const Node false_label{local_labels.at(block.branch_false)};
423 gotos.push_back(root.insert(
424 ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt)));
425 } else {
426 const Node true_label{local_labels.at(block.branch_true)};
427 const Node false_label{local_labels.at(block.branch_false)};
428 Statement* const true_cond{pool.Create(Identity{}, block.cond, &root_stmt)};
429 gotos.push_back(
430 root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt)));
431 gotos.push_back(root.insert(
432 ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt)));
433 }
434 break;
435 }
436 case Flow::EndClass::IndirectBranch:
437 root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg,
438 block.branch_offset, &root_stmt));
439 for (const Flow::IndirectBranch& indirect : block.indirect_branches) {
440 const Node indirect_label{local_labels.at(indirect.block)};
441 Statement* cond{
442 pool.Create(IndirectBranchCond{}, indirect.address, &root_stmt)};
443 Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)};
444 gotos.push_back(root.insert(ip, *goto_stmt));
445 }
446 root.insert(ip, *pool.Create(Unreachable{}, &root_stmt));
447 break;
448 case Flow::EndClass::Call: {
449 Flow::Function& call{cfg.Functions()[block.function_call]};
450 const Node call_return_label{local_labels.at(block.return_block)};
451 BuildTree(cfg, call, label_id, gotos, ip, call_return_label);
452 break;
453 }
454 case Flow::EndClass::Exit:
455 root.insert(ip, *pool.Create(Return{}, &root_stmt));
456 break;
457 case Flow::EndClass::Return: {
458 Statement* const always_cond{pool.Create(Identity{}, block.cond, &root_stmt)};
459 auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)};
460 gotos.push_back(root.insert(ip, *goto_stmt));
461 break;
462 }
463 case Flow::EndClass::Kill:
464 root.insert(ip, *pool.Create(Kill{}, &root_stmt));
465 break;
466 }
467 }
468 }
469
470 void UpdateTreeUp(Statement* tree) {
471 for (Statement& stmt : tree->children) {
472 stmt.up = tree;
473 }
474 }
475
476 void EliminateAsConditional(Node goto_stmt, Node label_stmt) {
477 Tree& body{goto_stmt->up->children};
478 Tree if_body;
479 if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt);
480 Statement* const cond{pool.Create(Not{}, goto_stmt->cond, &root_stmt)};
481 Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)};
482 UpdateTreeUp(if_stmt);
483 body.insert(goto_stmt, *if_stmt);
484 body.erase(goto_stmt);
485 }
486
487 void EliminateAsLoop(Node goto_stmt, Node label_stmt) {
488 Tree& body{goto_stmt->up->children};
489 Tree loop_body;
490 loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt);
491 Statement* const cond{goto_stmt->cond};
492 Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)};
493 UpdateTreeUp(loop);
494 body.insert(goto_stmt, *loop);
495 body.erase(goto_stmt);
496 }
497
498 [[nodiscard]] Node MoveOutward(Node goto_stmt) {
499 switch (goto_stmt->up->type) {
500 case StatementType::If:
501 return MoveOutwardIf(goto_stmt);
502 case StatementType::Loop:
503 return MoveOutwardLoop(goto_stmt);
504 default:
505 throw LogicError("Invalid outward movement");
506 }
507 }
508
509 [[nodiscard]] Node MoveInward(Node goto_stmt) {
510 Statement* const parent{goto_stmt->up};
511 Tree& body{parent->children};
512 const Node label{goto_stmt->label};
513 const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)};
514 const u32 label_id{label->id};
515
516 Statement* const goto_cond{goto_stmt->cond};
517 Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
518 body.insert(goto_stmt, *set_var);
519
520 Tree if_body;
521 if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt);
522 Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)};
523 Statement* const neg_var{pool.Create(Not{}, variable, &root_stmt)};
524 if (!if_body.empty()) {
525 Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)};
526 UpdateTreeUp(if_stmt);
527 body.insert(goto_stmt, *if_stmt);
528 }
529 body.erase(goto_stmt);
530
531 switch (label_nested_stmt->type) {
532 case StatementType::If:
533 // Update nested if condition
534 label_nested_stmt->cond =
535 pool.Create(Or{}, variable, label_nested_stmt->cond, &root_stmt);
536 break;
537 case StatementType::Loop:
538 break;
539 default:
540 throw LogicError("Invalid inward movement");
541 }
542 Tree& nested_tree{label_nested_stmt->children};
543 Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)};
544 return nested_tree.insert(nested_tree.begin(), *new_goto);
545 }
546
547 [[nodiscard]] Node Lift(Node goto_stmt) {
548 Statement* const parent{goto_stmt->up};
549 Tree& body{parent->children};
550 const Node label{goto_stmt->label};
551 const u32 label_id{label->id};
552 const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)};
553
554 Tree loop_body;
555 loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt);
556 SanitizeNoBreaks(loop_body);
557 Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)};
558 Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)};
559 UpdateTreeUp(loop_stmt);
560 body.insert(goto_stmt, *loop_stmt);
561
562 Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)};
563 loop_stmt->children.push_front(*new_goto);
564 const Node new_goto_node{loop_stmt->children.begin()};
565
566 Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)};
567 loop_stmt->children.push_back(*set_var);
568
569 body.erase(goto_stmt);
570 return new_goto_node;
571 }
572
573 Node MoveOutwardIf(Node goto_stmt) {
574 const Node parent{Tree::s_iterator_to(*goto_stmt->up)};
575 Tree& body{parent->children};
576 const u32 label_id{goto_stmt->label->id};
577 Statement* const goto_cond{goto_stmt->cond};
578 Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)};
579 body.insert(goto_stmt, *set_goto_var);
580
581 Tree if_body;
582 if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end());
583 if_body.pop_front();
584 Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)};
585 Statement* const neg_cond{pool.Create(Not{}, cond, &root_stmt)};
586 Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)};
587 UpdateTreeUp(if_stmt);
588 body.insert(goto_stmt, *if_stmt);
589
590 body.erase(goto_stmt);
591
592 Statement* const new_cond{pool.Create(Variable{}, label_id, &root_stmt)};
593 Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)};
594 Tree& parent_tree{parent->up->children};
595 return parent_tree.insert(std::next(parent), *new_goto);
596 }
597
598 Node MoveOutwardLoop(Node goto_stmt) {
599 Statement* const parent{goto_stmt->up};
600 Tree& body{parent->children};
601 const u32 label_id{goto_stmt->label->id};
602 Statement* const goto_cond{goto_stmt->cond};
603 Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
604 Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)};
605 Statement* const break_stmt{pool.Create(Break{}, cond, parent)};
606 body.insert(goto_stmt, *set_goto_var);
607 body.insert(goto_stmt, *break_stmt);
608 body.erase(goto_stmt);
609
610 const Node loop{Tree::s_iterator_to(*goto_stmt->up)};
611 Statement* const new_goto_cond{pool.Create(Variable{}, label_id, &root_stmt)};
612 Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)};
613 Tree& parent_tree{loop->up->children};
614 return parent_tree.insert(std::next(loop), *new_goto);
615 }
616
617 ObjectPool<Statement>& pool;
618 Statement root_stmt{FunctionTag{}};
619};
620
621[[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) {
622 Tree& tree{stmt.up->children};
623 const Node end{tree.end()};
624 Node forward_node{std::next(Tree::s_iterator_to(stmt))};
625 while (forward_node != end && !HasChildren(forward_node->type)) {
626 if (forward_node->type == StatementType::Code) {
627 return &*forward_node;
628 }
629 ++forward_node;
630 }
631 return nullptr;
632}
633
634[[nodiscard]] IR::U1 VisitExpr(IR::IREmitter& ir, const Statement& stmt) {
635 switch (stmt.type) {
636 case StatementType::Identity:
637 return ir.Condition(stmt.guest_cond);
638 case StatementType::Not:
639 return ir.LogicalNot(IR::U1{VisitExpr(ir, *stmt.op)});
640 case StatementType::Or:
641 return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b));
642 case StatementType::Variable:
643 return ir.GetGotoVariable(stmt.id);
644 case StatementType::IndirectBranchCond:
645 return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location));
646 default:
647 throw NotImplementedException("Statement type {}", stmt.type);
648 }
649}
650
651class TranslatePass {
652public:
653 TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
654 ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
655 IR::AbstractSyntaxList& syntax_list_)
656 : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
657 syntax_list{syntax_list_} {
658 Visit(root_stmt, nullptr, nullptr);
659
660 IR::Block& first_block{*syntax_list.front().data.block};
661 IR::IREmitter ir(first_block, first_block.begin());
662 ir.Prologue();
663 }
664
665private:
666 void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) {
667 IR::Block* current_block{};
668 const auto ensure_block{[&] {
669 if (current_block) {
670 return;
671 }
672 current_block = block_pool.Create(inst_pool);
673 auto& node{syntax_list.emplace_back()};
674 node.type = IR::AbstractSyntaxNode::Type::Block;
675 node.data.block = current_block;
676 }};
677 Tree& tree{parent.children};
678 for (auto it = tree.begin(); it != tree.end(); ++it) {
679 Statement& stmt{*it};
680 switch (stmt.type) {
681 case StatementType::Label:
682 // Labels can be ignored
683 break;
684 case StatementType::Code: {
685 ensure_block();
686 Translate(env, current_block, stmt.block->begin.Offset(), stmt.block->end.Offset());
687 break;
688 }
689 case StatementType::SetVariable: {
690 ensure_block();
691 IR::IREmitter ir{*current_block};
692 ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op));
693 break;
694 }
695 case StatementType::SetIndirectBranchVariable: {
696 ensure_block();
697 IR::IREmitter ir{*current_block};
698 IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))};
699 ir.SetIndirectBranchVariable(address);
700 break;
701 }
702 case StatementType::If: {
703 ensure_block();
704 IR::Block* const merge_block{MergeBlock(parent, stmt)};
705
706 // Implement if header block
707 IR::IREmitter ir{*current_block};
708 const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
709
710 const size_t if_node_index{syntax_list.size()};
711 syntax_list.emplace_back();
712
713 // Visit children
714 const size_t then_block_index{syntax_list.size()};
715 Visit(stmt, break_block, merge_block);
716
717 IR::Block* const then_block{syntax_list.at(then_block_index).data.block};
718 current_block->AddBranch(then_block);
719 current_block->AddBranch(merge_block);
720 current_block = merge_block;
721
722 auto& if_node{syntax_list[if_node_index]};
723 if_node.type = IR::AbstractSyntaxNode::Type::If;
724 if_node.data.if_node.cond = cond;
725 if_node.data.if_node.body = then_block;
726 if_node.data.if_node.merge = merge_block;
727
728 auto& endif_node{syntax_list.emplace_back()};
729 endif_node.type = IR::AbstractSyntaxNode::Type::EndIf;
730 endif_node.data.end_if.merge = merge_block;
731
732 auto& merge{syntax_list.emplace_back()};
733 merge.type = IR::AbstractSyntaxNode::Type::Block;
734 merge.data.block = merge_block;
735 break;
736 }
737 case StatementType::Loop: {
738 IR::Block* const loop_header_block{block_pool.Create(inst_pool)};
739 if (current_block) {
740 current_block->AddBranch(loop_header_block);
741 }
742 auto& header_node{syntax_list.emplace_back()};
743 header_node.type = IR::AbstractSyntaxNode::Type::Block;
744 header_node.data.block = loop_header_block;
745
746 IR::Block* const continue_block{block_pool.Create(inst_pool)};
747 IR::Block* const merge_block{MergeBlock(parent, stmt)};
748
749 const size_t loop_node_index{syntax_list.size()};
750 syntax_list.emplace_back();
751
752 // Visit children
753 const size_t body_block_index{syntax_list.size()};
754 Visit(stmt, merge_block, continue_block);
755
756 // The continue block is located at the end of the loop
757 IR::IREmitter ir{*continue_block};
758 const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
759
760 IR::Block* const body_block{syntax_list.at(body_block_index).data.block};
761 loop_header_block->AddBranch(body_block);
762
763 continue_block->AddBranch(loop_header_block);
764 continue_block->AddBranch(merge_block);
765
766 current_block = merge_block;
767
768 auto& loop{syntax_list[loop_node_index]};
769 loop.type = IR::AbstractSyntaxNode::Type::Loop;
770 loop.data.loop.body = body_block;
771 loop.data.loop.continue_block = continue_block;
772 loop.data.loop.merge = merge_block;
773
774 auto& continue_block_node{syntax_list.emplace_back()};
775 continue_block_node.type = IR::AbstractSyntaxNode::Type::Block;
776 continue_block_node.data.block = continue_block;
777
778 auto& repeat{syntax_list.emplace_back()};
779 repeat.type = IR::AbstractSyntaxNode::Type::Repeat;
780 repeat.data.repeat.cond = cond;
781 repeat.data.repeat.loop_header = loop_header_block;
782 repeat.data.repeat.merge = merge_block;
783
784 auto& merge{syntax_list.emplace_back()};
785 merge.type = IR::AbstractSyntaxNode::Type::Block;
786 merge.data.block = merge_block;
787 break;
788 }
789 case StatementType::Break: {
790 ensure_block();
791 IR::Block* const skip_block{MergeBlock(parent, stmt)};
792
793 IR::IREmitter ir{*current_block};
794 const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
795 current_block->AddBranch(break_block);
796 current_block->AddBranch(skip_block);
797 current_block = skip_block;
798
799 auto& break_node{syntax_list.emplace_back()};
800 break_node.type = IR::AbstractSyntaxNode::Type::Break;
801 break_node.data.break_node.cond = cond;
802 break_node.data.break_node.merge = break_block;
803 break_node.data.break_node.skip = skip_block;
804
805 auto& merge{syntax_list.emplace_back()};
806 merge.type = IR::AbstractSyntaxNode::Type::Block;
807 merge.data.block = skip_block;
808 break;
809 }
810 case StatementType::Return: {
811 ensure_block();
812 IR::IREmitter{*current_block}.Epilogue();
813 current_block = nullptr;
814 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
815 break;
816 }
817 case StatementType::Kill: {
818 ensure_block();
819 IR::Block* demote_block{MergeBlock(parent, stmt)};
820 IR::IREmitter{*current_block}.DemoteToHelperInvocation();
821 current_block->AddBranch(demote_block);
822 current_block = demote_block;
823
824 auto& merge{syntax_list.emplace_back()};
825 merge.type = IR::AbstractSyntaxNode::Type::Block;
826 merge.data.block = demote_block;
827 break;
828 }
829 case StatementType::Unreachable: {
830 ensure_block();
831 current_block = nullptr;
832 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
833 break;
834 }
835 default:
836 throw NotImplementedException("Statement type {}", stmt.type);
837 }
838 }
839 if (current_block) {
840 if (fallthrough_block) {
841 current_block->AddBranch(fallthrough_block);
842 } else {
843 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
844 }
845 }
846 }
847
848 IR::Block* MergeBlock(Statement& parent, Statement& stmt) {
849 Statement* merge_stmt{TryFindForwardBlock(stmt)};
850 if (!merge_stmt) {
851 // Create a merge block we can visit later
852 merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent);
853 parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt);
854 }
855 return block_pool.Create(inst_pool);
856 }
857
858 ObjectPool<Statement>& stmt_pool;
859 ObjectPool<IR::Inst>& inst_pool;
860 ObjectPool<IR::Block>& block_pool;
861 Environment& env;
862 IR::AbstractSyntaxList& syntax_list;
863
864// TODO: C++20 Remove this when all compilers support constexpr std::vector
865#if __cpp_lib_constexpr_vector >= 201907
866 static constexpr Flow::Block dummy_flow_block;
867#else
868 const Flow::Block dummy_flow_block;
869#endif
870};
871} // Anonymous namespace
872
873IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
874 Environment& env, Flow::CFG& cfg) {
875 ObjectPool<Statement> stmt_pool{64};
876 GotoPass goto_pass{cfg, stmt_pool};
877 Statement& root{goto_pass.RootStatement()};
878 IR::AbstractSyntaxList syntax_list;
879 TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list};
880 return syntax_list;
881}
882
883} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
new file mode 100644
index 000000000..88b083649
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
@@ -0,0 +1,20 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/environment.h"
8#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
9#include "shader_recompiler/frontend/ir/basic_block.h"
10#include "shader_recompiler/frontend/ir/value.h"
11#include "shader_recompiler/frontend/maxwell/control_flow.h"
12#include "shader_recompiler/object_pool.h"
13
14namespace Shader::Maxwell {
15
16[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
17 ObjectPool<IR::Block>& block_pool, Environment& env,
18 Flow::CFG& cfg);
19
20} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
new file mode 100644
index 000000000..d9f999e05
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
@@ -0,0 +1,214 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class AtomOp : u64 {
12 ADD,
13 MIN,
14 MAX,
15 INC,
16 DEC,
17 AND,
18 OR,
19 XOR,
20 EXCH,
21 SAFEADD,
22};
23
24enum class AtomSize : u64 {
25 U32,
26 S32,
27 U64,
28 F32,
29 F16x2,
30 S64,
31};
32
33IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
34 AtomOp op, bool is_signed) {
35 switch (op) {
36 case AtomOp::ADD:
37 return ir.GlobalAtomicIAdd(offset, op_b);
38 case AtomOp::MIN:
39 return ir.GlobalAtomicIMin(offset, op_b, is_signed);
40 case AtomOp::MAX:
41 return ir.GlobalAtomicIMax(offset, op_b, is_signed);
42 case AtomOp::INC:
43 return ir.GlobalAtomicInc(offset, op_b);
44 case AtomOp::DEC:
45 return ir.GlobalAtomicDec(offset, op_b);
46 case AtomOp::AND:
47 return ir.GlobalAtomicAnd(offset, op_b);
48 case AtomOp::OR:
49 return ir.GlobalAtomicOr(offset, op_b);
50 case AtomOp::XOR:
51 return ir.GlobalAtomicXor(offset, op_b);
52 case AtomOp::EXCH:
53 return ir.GlobalAtomicExchange(offset, op_b);
54 default:
55 throw NotImplementedException("Integer Atom Operation {}", op);
56 }
57}
58
59IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
60 AtomSize size) {
61 static constexpr IR::FpControl f16_control{
62 .no_contraction = false,
63 .rounding = IR::FpRounding::RN,
64 .fmz_mode = IR::FmzMode::DontCare,
65 };
66 static constexpr IR::FpControl f32_control{
67 .no_contraction = false,
68 .rounding = IR::FpRounding::RN,
69 .fmz_mode = IR::FmzMode::FTZ,
70 };
71 switch (op) {
72 case AtomOp::ADD:
73 return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
74 : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
75 case AtomOp::MIN:
76 return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
77 case AtomOp::MAX:
78 return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
79 default:
80 throw NotImplementedException("FP Atom Operation {}", op);
81 }
82}
83
84IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
85 union {
86 u64 raw;
87 BitField<8, 8, IR::Reg> addr_reg;
88 BitField<28, 20, s64> addr_offset;
89 BitField<28, 20, u64> rz_addr_offset;
90 BitField<48, 1, u64> e;
91 } const mem{insn};
92
93 const IR::U64 address{[&]() -> IR::U64 {
94 if (mem.e == 0) {
95 return v.ir.UConvert(64, v.X(mem.addr_reg));
96 }
97 return v.L(mem.addr_reg);
98 }()};
99 const u64 addr_offset{[&]() -> u64 {
100 if (mem.addr_reg == IR::Reg::RZ) {
101 // When RZ is used, the address is an absolute address
102 return static_cast<u64>(mem.rz_addr_offset.Value());
103 } else {
104 return static_cast<u64>(mem.addr_offset.Value());
105 }
106 }()};
107 return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
108}
109
110bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
111 // TODO: SAFEADD
112 switch (size) {
113 case AtomSize::S32:
114 case AtomSize::U64:
115 return (op == AtomOp::INC || op == AtomOp::DEC);
116 case AtomSize::S64:
117 return !(op == AtomOp::MIN || op == AtomOp::MAX);
118 case AtomSize::F32:
119 return op != AtomOp::ADD;
120 case AtomSize::F16x2:
121 return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
122 default:
123 return false;
124 }
125}
126
127IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
128 switch (size) {
129 case AtomSize::U32:
130 case AtomSize::S32:
131 case AtomSize::F32:
132 case AtomSize::F16x2:
133 return ir.LoadGlobal32(offset);
134 case AtomSize::U64:
135 case AtomSize::S64:
136 return ir.PackUint2x32(ir.LoadGlobal64(offset));
137 default:
138 throw NotImplementedException("Atom Size {}", size);
139 }
140}
141
142void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
143 switch (size) {
144 case AtomSize::U32:
145 case AtomSize::S32:
146 case AtomSize::F16x2:
147 return v.X(dest_reg, IR::U32{result});
148 case AtomSize::U64:
149 case AtomSize::S64:
150 return v.L(dest_reg, IR::U64{result});
151 case AtomSize::F32:
152 return v.F(dest_reg, IR::F32{result});
153 default:
154 break;
155 }
156}
157
158IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset,
159 AtomSize size, AtomOp op) {
160 switch (size) {
161 case AtomSize::U32:
162 case AtomSize::S32:
163 return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32);
164 case AtomSize::U64:
165 case AtomSize::S64:
166 return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64);
167 case AtomSize::F32:
168 return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size);
169 case AtomSize::F16x2: {
170 return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size);
171 }
172 default:
173 throw NotImplementedException("Atom Size {}", size);
174 }
175}
176
177void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg,
178 const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) {
179 IR::Value result;
180 if (AtomOpNotApplicable(size, op)) {
181 result = LoadGlobal(v.ir, offset, size);
182 } else {
183 result = ApplyAtomOp(v, operand_reg, offset, size, op);
184 }
185 if (write_dest) {
186 StoreResult(v, dest_reg, result, size);
187 }
188}
189} // Anonymous namespace
190
191void TranslatorVisitor::ATOM(u64 insn) {
192 union {
193 u64 raw;
194 BitField<0, 8, IR::Reg> dest_reg;
195 BitField<20, 8, IR::Reg> operand_reg;
196 BitField<49, 3, AtomSize> size;
197 BitField<52, 4, AtomOp> op;
198 } const atom{insn};
199 const IR::U64 offset{AtomOffset(*this, insn)};
200 GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true);
201}
202
203void TranslatorVisitor::RED(u64 insn) {
204 union {
205 u64 raw;
206 BitField<0, 8, IR::Reg> operand_reg;
207 BitField<20, 3, AtomSize> size;
208 BitField<23, 3, AtomOp> op;
209 } const red{insn};
210 const IR::U64 offset{AtomOffset(*this, insn)};
211 GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true);
212}
213
214} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
new file mode 100644
index 000000000..8b974621e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
@@ -0,0 +1,110 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class AtomOp : u64 {
12 ADD,
13 MIN,
14 MAX,
15 INC,
16 DEC,
17 AND,
18 OR,
19 XOR,
20 EXCH,
21};
22
23enum class AtomsSize : u64 {
24 U32,
25 S32,
26 U64,
27};
28
29IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
30 bool is_signed) {
31 switch (op) {
32 case AtomOp::ADD:
33 return ir.SharedAtomicIAdd(offset, op_b);
34 case AtomOp::MIN:
35 return ir.SharedAtomicIMin(offset, op_b, is_signed);
36 case AtomOp::MAX:
37 return ir.SharedAtomicIMax(offset, op_b, is_signed);
38 case AtomOp::INC:
39 return ir.SharedAtomicInc(offset, op_b);
40 case AtomOp::DEC:
41 return ir.SharedAtomicDec(offset, op_b);
42 case AtomOp::AND:
43 return ir.SharedAtomicAnd(offset, op_b);
44 case AtomOp::OR:
45 return ir.SharedAtomicOr(offset, op_b);
46 case AtomOp::XOR:
47 return ir.SharedAtomicXor(offset, op_b);
48 case AtomOp::EXCH:
49 return ir.SharedAtomicExchange(offset, op_b);
50 default:
51 throw NotImplementedException("Integer Atoms Operation {}", op);
52 }
53}
54
55IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
56 union {
57 u64 raw;
58 BitField<8, 8, IR::Reg> offset_reg;
59 BitField<30, 22, u64> absolute_offset;
60 BitField<30, 22, s64> relative_offset;
61 } const encoding{insn};
62
63 if (encoding.offset_reg == IR::Reg::RZ) {
64 return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
65 } else {
66 const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
67 return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
68 }
69}
70
71void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
72 switch (size) {
73 case AtomsSize::U32:
74 case AtomsSize::S32:
75 return v.X(dest_reg, IR::U32{result});
76 case AtomsSize::U64:
77 return v.L(dest_reg, IR::U64{result});
78 default:
79 break;
80 }
81}
82} // Anonymous namespace
83
84void TranslatorVisitor::ATOMS(u64 insn) {
85 union {
86 u64 raw;
87 BitField<0, 8, IR::Reg> dest_reg;
88 BitField<8, 8, IR::Reg> addr_reg;
89 BitField<20, 8, IR::Reg> src_reg_b;
90 BitField<28, 2, AtomsSize> size;
91 BitField<52, 4, AtomOp> op;
92 } const atoms{insn};
93
94 const bool size_64{atoms.size == AtomsSize::U64};
95 if (size_64 && atoms.op != AtomOp::EXCH) {
96 throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
97 }
98 const bool is_signed{atoms.size == AtomsSize::S32};
99 const IR::U32 offset{AtomsOffset(*this, insn)};
100
101 IR::Value result;
102 if (size_64) {
103 result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
104 } else {
105 result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
106 }
107 StoreResult(*this, atoms.dest_reg, result, atoms.size);
108}
109
110} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
new file mode 100644
index 000000000..fb3f00d3f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
@@ -0,0 +1,35 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/opcodes.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11
12enum class BitSize : u64 {
13 B32,
14 B64,
15 B96,
16 B128,
17};
18
19void TranslatorVisitor::AL2P(u64 inst) {
20 union {
21 u64 raw;
22 BitField<0, 8, IR::Reg> result_register;
23 BitField<8, 8, IR::Reg> indexing_register;
24 BitField<20, 11, s64> offset;
25 BitField<47, 2, BitSize> bitsize;
26 } al2p{inst};
27 if (al2p.bitsize != BitSize::B32) {
28 throw NotImplementedException("BitSize {}", al2p.bitsize.Value());
29 }
30 const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))};
31 const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)};
32 X(al2p.result_register, result);
33}
34
35} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
new file mode 100644
index 000000000..86e433e41
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
@@ -0,0 +1,96 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/ir/modifiers.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13// Seems to be in CUDA terminology.
14enum class LocalScope : u64 {
15 CTA,
16 GL,
17 SYS,
18 VC,
19};
20} // Anonymous namespace
21
22void TranslatorVisitor::MEMBAR(u64 inst) {
23 union {
24 u64 raw;
25 BitField<8, 2, LocalScope> scope;
26 } const membar{inst};
27
28 if (membar.scope == LocalScope::CTA) {
29 ir.WorkgroupMemoryBarrier();
30 } else {
31 ir.DeviceMemoryBarrier();
32 }
33}
34
35void TranslatorVisitor::DEPBAR() {
36 // DEPBAR is a no-op
37}
38
39void TranslatorVisitor::BAR(u64 insn) {
40 enum class Mode {
41 RedPopc,
42 Scan,
43 RedAnd,
44 RedOr,
45 Sync,
46 Arrive,
47 };
48 union {
49 u64 raw;
50 BitField<43, 1, u64> is_a_imm;
51 BitField<44, 1, u64> is_b_imm;
52 BitField<8, 8, u64> imm_a;
53 BitField<20, 12, u64> imm_b;
54 BitField<42, 1, u64> neg_pred;
55 BitField<39, 3, IR::Pred> pred;
56 } const bar{insn};
57
58 const Mode mode{[insn] {
59 switch (insn & 0x0000009B00000000ULL) {
60 case 0x0000000200000000ULL:
61 return Mode::RedPopc;
62 case 0x0000000300000000ULL:
63 return Mode::Scan;
64 case 0x0000000A00000000ULL:
65 return Mode::RedAnd;
66 case 0x0000001200000000ULL:
67 return Mode::RedOr;
68 case 0x0000008000000000ULL:
69 return Mode::Sync;
70 case 0x0000008100000000ULL:
71 return Mode::Arrive;
72 }
73 throw NotImplementedException("Invalid encoding");
74 }()};
75 if (mode != Mode::Sync) {
76 throw NotImplementedException("BAR mode {}", mode);
77 }
78 if (bar.is_a_imm == 0) {
79 throw NotImplementedException("Non-immediate input A");
80 }
81 if (bar.imm_a != 0) {
82 throw NotImplementedException("Non-zero input A");
83 }
84 if (bar.is_b_imm == 0) {
85 throw NotImplementedException("Non-immediate input B");
86 }
87 if (bar.imm_b != 0) {
88 throw NotImplementedException("Non-zero input B");
89 }
90 if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) {
91 throw NotImplementedException("Non-true input predicate");
92 }
93 ir.Barrier();
94}
95
96} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
new file mode 100644
index 000000000..9d5a87e52
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
@@ -0,0 +1,74 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> offset_reg;
16 BitField<40, 1, u64> brev;
17 BitField<47, 1, u64> cc;
18 BitField<48, 1, u64> is_signed;
19 } const bfe{insn};
20
21 const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)};
22 const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)};
23
24 // Common constants
25 const IR::U32 zero{v.ir.Imm32(0)};
26 const IR::U32 one{v.ir.Imm32(1)};
27 const IR::U32 max_size{v.ir.Imm32(32)};
28 // Edge case conditions
29 const IR::U1 zero_count{v.ir.IEqual(count, zero)};
30 const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)};
31 const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)};
32
33 IR::U32 base{v.X(bfe.offset_reg)};
34 if (bfe.brev != 0) {
35 base = v.ir.BitReverse(base);
36 }
37 IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)};
38 if (bfe.is_signed != 0) {
39 const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)};
40 const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
41 const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)};
42 // Replicate condition
43 result = IR::U32{v.ir.Select(replicate, replicated_bit, result)};
44 // Exceeding condition
45 const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)};
46 result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)};
47 }
48 // Zero count condition
49 result = IR::U32{v.ir.Select(zero_count, zero, result)};
50
51 v.X(bfe.dest_reg, result);
52
53 if (bfe.cc != 0) {
54 v.SetZFlag(v.ir.IEqual(result, zero));
55 v.SetSFlag(v.ir.ILessThan(result, zero, true));
56 v.ResetCFlag();
57 v.ResetOFlag();
58 }
59}
60} // Anonymous namespace
61
62void TranslatorVisitor::BFE_reg(u64 insn) {
63 BFE(*this, insn, GetReg20(insn));
64}
65
66void TranslatorVisitor::BFE_cbuf(u64 insn) {
67 BFE(*this, insn, GetCbuf(insn));
68}
69
70void TranslatorVisitor::BFE_imm(u64 insn) {
71 BFE(*this, insn, GetImm20(insn));
72}
73
74} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
new file mode 100644
index 000000000..1e1ec2119
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> insert_reg;
16 BitField<47, 1, u64> cc;
17 } const bfi{insn};
18
19 const IR::U32 zero{v.ir.Imm32(0)};
20 const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)};
21 const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)};
22 const IR::U32 max_size{v.ir.Imm32(32)};
23
24 // Edge case conditions
25 const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)};
26 const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)};
27
28 const IR::U32 remaining_size{v.ir.ISub(max_size, offset)};
29 const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)};
30
31 const IR::U32 insert{v.X(bfi.insert_reg)};
32 IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)};
33
34 result = IR::U32{v.ir.Select(exceed_offset, base, result)};
35
36 v.X(bfi.dest_reg, result);
37 if (bfi.cc != 0) {
38 v.SetZFlag(v.ir.IEqual(result, zero));
39 v.SetSFlag(v.ir.ILessThan(result, zero, true));
40 v.ResetCFlag();
41 v.ResetOFlag();
42 }
43}
44} // Anonymous namespace
45
46void TranslatorVisitor::BFI_reg(u64 insn) {
47 BFI(*this, insn, GetReg20(insn), GetReg39(insn));
48}
49
50void TranslatorVisitor::BFI_rc(u64 insn) {
51 BFI(*this, insn, GetReg39(insn), GetCbuf(insn));
52}
53
54void TranslatorVisitor::BFI_cr(u64 insn) {
55 BFI(*this, insn, GetCbuf(insn), GetReg39(insn));
56}
57
58void TranslatorVisitor::BFI_imm(u64 insn) {
59 BFI(*this, insn, GetImm20(insn), GetReg39(insn));
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
new file mode 100644
index 000000000..371c0e0f7
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
@@ -0,0 +1,36 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void Check(u64 insn) {
13 union {
14 u64 raw;
15 BitField<5, 1, u64> cbuf_mode;
16 BitField<6, 1, u64> lmt;
17 } const encoding{insn};
18
19 if (encoding.cbuf_mode != 0) {
20 throw NotImplementedException("Constant buffer mode");
21 }
22 if (encoding.lmt != 0) {
23 throw NotImplementedException("LMT");
24 }
25}
26} // Anonymous namespace
27
28void TranslatorVisitor::BRX(u64 insn) {
29 Check(insn);
30}
31
32void TranslatorVisitor::JMX(u64 insn) {
33 Check(insn);
34}
35
36} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
new file mode 100644
index 000000000..fd73f656c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
@@ -0,0 +1,57 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
11
12namespace Shader::Maxwell {
13
14enum class FpRounding : u64 {
15 RN,
16 RM,
17 RP,
18 RZ,
19};
20
21enum class FmzMode : u64 {
22 None,
23 FTZ,
24 FMZ,
25 INVALIDFMZ3,
26};
27
28inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) {
29 switch (fp_rounding) {
30 case FpRounding::RN:
31 return IR::FpRounding::RN;
32 case FpRounding::RM:
33 return IR::FpRounding::RM;
34 case FpRounding::RP:
35 return IR::FpRounding::RP;
36 case FpRounding::RZ:
37 return IR::FpRounding::RZ;
38 }
39 throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding);
40}
41
42inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
43 switch (fmz_mode) {
44 case FmzMode::None:
45 return IR::FmzMode::None;
46 case FmzMode::FTZ:
47 return IR::FmzMode::FTZ;
48 case FmzMode::FMZ:
49 // FMZ is manually handled in the instruction
50 return IR::FmzMode::FTZ;
51 case FmzMode::INVALIDFMZ3:
52 break;
53 }
54 throw NotImplementedException("Invalid FMZ mode {}", fmz_mode);
55}
56
57} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
new file mode 100644
index 000000000..20458d2ad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
@@ -0,0 +1,153 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
6
7namespace Shader::Maxwell {
8IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
9 CompareOp compare_op, bool is_signed) {
10 switch (compare_op) {
11 case CompareOp::False:
12 return ir.Imm1(false);
13 case CompareOp::LessThan:
14 return ir.ILessThan(operand_1, operand_2, is_signed);
15 case CompareOp::Equal:
16 return ir.IEqual(operand_1, operand_2);
17 case CompareOp::LessThanEqual:
18 return ir.ILessThanEqual(operand_1, operand_2, is_signed);
19 case CompareOp::GreaterThan:
20 return ir.IGreaterThan(operand_1, operand_2, is_signed);
21 case CompareOp::NotEqual:
22 return ir.INotEqual(operand_1, operand_2);
23 case CompareOp::GreaterThanEqual:
24 return ir.IGreaterThanEqual(operand_1, operand_2, is_signed);
25 case CompareOp::True:
26 return ir.Imm1(true);
27 default:
28 throw NotImplementedException("Invalid compare op {}", compare_op);
29 }
30}
31
32IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
33 CompareOp compare_op, bool is_signed) {
34 const IR::U32 zero{ir.Imm32(0)};
35 const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)};
36 const IR::U1 z_flag{ir.GetZFlag()};
37 const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)};
38 const IR::U1 flip_logic{is_signed ? ir.Imm1(false)
39 : ir.LogicalXor(ir.ILessThan(operand_1, zero, true),
40 ir.ILessThan(operand_2, zero, true))};
41 switch (compare_op) {
42 case CompareOp::False:
43 return ir.Imm1(false);
44 case CompareOp::LessThan:
45 return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
46 ir.ILessThan(intermediate, zero, true))};
47 case CompareOp::Equal:
48 return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag);
49 case CompareOp::LessThanEqual: {
50 const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
51 ir.ILessThan(intermediate, zero, true))};
52 return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
53 }
54 case CompareOp::GreaterThan: {
55 const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true),
56 ir.IGreaterThan(intermediate, zero, true))};
57 const IR::U1 not_z{ir.LogicalNot(z_flag)};
58 return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z));
59 }
60 case CompareOp::NotEqual:
61 return ir.LogicalOr(ir.INotEqual(intermediate, zero),
62 ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag)));
63 case CompareOp::GreaterThanEqual: {
64 const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true),
65 ir.IGreaterThanEqual(intermediate, zero, true))};
66 return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
67 }
68 case CompareOp::True:
69 return ir.Imm1(true);
70 default:
71 throw NotImplementedException("Invalid compare op {}", compare_op);
72 }
73}
74
75IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2,
76 BooleanOp bop) {
77 switch (bop) {
78 case BooleanOp::AND:
79 return ir.LogicalAnd(predicate_1, predicate_2);
80 case BooleanOp::OR:
81 return ir.LogicalOr(predicate_1, predicate_2);
82 case BooleanOp::XOR:
83 return ir.LogicalXor(predicate_1, predicate_2);
84 default:
85 throw NotImplementedException("Invalid bop {}", bop);
86 }
87}
88
89IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) {
90 switch (op) {
91 case PredicateOp::False:
92 return ir.Imm1(false);
93 case PredicateOp::True:
94 return ir.Imm1(true);
95 case PredicateOp::Zero:
96 return ir.IEqual(result, ir.Imm32(0));
97 case PredicateOp::NonZero:
98 return ir.INotEqual(result, ir.Imm32(0));
99 default:
100 throw NotImplementedException("Invalid Predicate operation {}", op);
101 }
102}
103
104bool IsCompareOpOrdered(FPCompareOp op) {
105 switch (op) {
106 case FPCompareOp::LTU:
107 case FPCompareOp::EQU:
108 case FPCompareOp::LEU:
109 case FPCompareOp::GTU:
110 case FPCompareOp::NEU:
111 case FPCompareOp::GEU:
112 return false;
113 default:
114 return true;
115 }
116}
117
118IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
119 const IR::F16F32F64& operand_2, FPCompareOp compare_op,
120 IR::FpControl control) {
121 const bool ordered{IsCompareOpOrdered(compare_op)};
122 switch (compare_op) {
123 case FPCompareOp::F:
124 return ir.Imm1(false);
125 case FPCompareOp::LT:
126 case FPCompareOp::LTU:
127 return ir.FPLessThan(operand_1, operand_2, control, ordered);
128 case FPCompareOp::EQ:
129 case FPCompareOp::EQU:
130 return ir.FPEqual(operand_1, operand_2, control, ordered);
131 case FPCompareOp::LE:
132 case FPCompareOp::LEU:
133 return ir.FPLessThanEqual(operand_1, operand_2, control, ordered);
134 case FPCompareOp::GT:
135 case FPCompareOp::GTU:
136 return ir.FPGreaterThan(operand_1, operand_2, control, ordered);
137 case FPCompareOp::NE:
138 case FPCompareOp::NEU:
139 return ir.FPNotEqual(operand_1, operand_2, control, ordered);
140 case FPCompareOp::GE:
141 case FPCompareOp::GEU:
142 return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered);
143 case FPCompareOp::NUM:
144 return ir.FPOrdered(operand_1, operand_2);
145 case FPCompareOp::Nan:
146 return ir.FPUnordered(operand_1, operand_2);
147 case FPCompareOp::T:
148 return ir.Imm1(true);
149 default:
150 throw NotImplementedException("Invalid FP compare op {}", compare_op);
151 }
152}
153} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
new file mode 100644
index 000000000..214d0af3c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
12 const IR::U32& operand_2, CompareOp compare_op, bool is_signed);
13
14[[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
15 const IR::U32& operand_2, CompareOp compare_op,
16 bool is_signed);
17
18[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1,
19 const IR::U1& predicate_2, BooleanOp bop);
20
21[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op);
22
23[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op);
24
25[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
26 const IR::F16F32F64& operand_2, FPCompareOp compare_op,
27 IR::FpControl control = {});
28} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
new file mode 100644
index 000000000..420f2fb94
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11
12void TranslatorVisitor::CSET(u64 insn) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 5, IR::FlowTest> cc_test;
17 BitField<39, 3, IR::Pred> bop_pred;
18 BitField<42, 1, u64> neg_bop_pred;
19 BitField<44, 1, u64> bf;
20 BitField<45, 2, BooleanOp> bop;
21 BitField<47, 1, u64> cc;
22 } const cset{insn};
23
24 const IR::U32 one_mask{ir.Imm32(-1)};
25 const IR::U32 fp_one{ir.Imm32(0x3f800000)};
26 const IR::U32 zero{ir.Imm32(0)};
27 const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one};
28 const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)};
29 const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)};
30 const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)};
31 const IR::U32 result{ir.Select(pred_result, pass_result, zero)};
32 X(cset.dest_reg, result);
33 if (cset.cc != 0) {
34 const IR::U1 is_zero{ir.IEqual(result, zero)};
35 SetZFlag(is_zero);
36 if (cset.bf != 0) {
37 ResetSFlag();
38 } else {
39 SetSFlag(ir.LogicalNot(is_zero));
40 }
41 ResetOFlag();
42 ResetCFlag();
43 }
44}
45
46void TranslatorVisitor::CSETP(u64 insn) {
47 union {
48 u64 raw;
49 BitField<0, 3, IR::Pred> dest_pred_b;
50 BitField<3, 3, IR::Pred> dest_pred_a;
51 BitField<8, 5, IR::FlowTest> cc_test;
52 BitField<39, 3, IR::Pred> bop_pred;
53 BitField<42, 1, u64> neg_bop_pred;
54 BitField<45, 2, BooleanOp> bop;
55 } const csetp{insn};
56
57 const BooleanOp bop{csetp.bop};
58 const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)};
59 const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)};
60 const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)};
61 const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)};
62 ir.SetPred(csetp.dest_pred_a, result_a);
63 ir.SetPred(csetp.dest_pred_b, result_b);
64}
65
66} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
new file mode 100644
index 000000000..5a1b3a8fc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12
13void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a_reg;
18 BitField<39, 2, FpRounding> fp_rounding;
19 BitField<45, 1, u64> neg_b;
20 BitField<46, 1, u64> abs_a;
21 BitField<47, 1, u64> cc;
22 BitField<48, 1, u64> neg_a;
23 BitField<49, 1, u64> abs_b;
24 } const dadd{insn};
25 if (dadd.cc != 0) {
26 throw NotImplementedException("DADD CC");
27 }
28
29 const IR::F64 src_a{v.D(dadd.src_a_reg)};
30 const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)};
31 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
32
33 const IR::FpControl control{
34 .no_contraction = true,
35 .rounding = CastFpRounding(dadd.fp_rounding),
36 .fmz_mode = IR::FmzMode::None,
37 };
38
39 v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
40}
41} // Anonymous namespace
42
43void TranslatorVisitor::DADD_reg(u64 insn) {
44 DADD(*this, insn, GetDoubleReg20(insn));
45}
46
47void TranslatorVisitor::DADD_cbuf(u64 insn) {
48 DADD(*this, insn, GetDoubleCbuf(insn));
49}
50
51void TranslatorVisitor::DADD_imm(u64 insn) {
52 DADD(*this, insn, GetDoubleImm20(insn));
53}
54
55} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
new file mode 100644
index 000000000..1173192e4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
@@ -0,0 +1,72 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a_reg;
17 BitField<39, 3, IR::Pred> pred;
18 BitField<42, 1, u64> neg_pred;
19 BitField<43, 1, u64> negate_a;
20 BitField<44, 1, u64> abs_b;
21 BitField<45, 2, BooleanOp> bop;
22 BitField<47, 1, u64> cc;
23 BitField<48, 4, FPCompareOp> compare_op;
24 BitField<52, 1, u64> bf;
25 BitField<53, 1, u64> negate_b;
26 BitField<54, 1, u64> abs_a;
27 } const dset{insn};
28
29 const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)};
30 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)};
31
32 IR::U1 pred{v.ir.GetPred(dset.pred)};
33 if (dset.neg_pred != 0) {
34 pred = v.ir.LogicalNot(pred);
35 }
36 const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)};
37 const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)};
38
39 const IR::U32 one_mask{v.ir.Imm32(-1)};
40 const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
41 const IR::U32 zero{v.ir.Imm32(0)};
42 const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one};
43 const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
44
45 v.X(dset.dest_reg, result);
46 if (dset.cc != 0) {
47 const IR::U1 is_zero{v.ir.IEqual(result, zero)};
48 v.SetZFlag(is_zero);
49 if (dset.bf != 0) {
50 v.ResetSFlag();
51 } else {
52 v.SetSFlag(v.ir.LogicalNot(is_zero));
53 }
54 v.ResetCFlag();
55 v.ResetOFlag();
56 }
57}
58} // Anonymous namespace
59
60void TranslatorVisitor::DSET_reg(u64 insn) {
61 DSET(*this, insn, GetDoubleReg20(insn));
62}
63
64void TranslatorVisitor::DSET_cbuf(u64 insn) {
65 DSET(*this, insn, GetDoubleCbuf(insn));
66}
67
68void TranslatorVisitor::DSET_imm(u64 insn) {
69 DSET(*this, insn, GetDoubleImm20(insn));
70}
71
72} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
new file mode 100644
index 000000000..f66097014
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
@@ -0,0 +1,58 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12
13void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a_reg;
18 BitField<50, 2, FpRounding> fp_rounding;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> neg_b;
21 BitField<49, 1, u64> neg_c;
22 } const dfma{insn};
23
24 if (dfma.cc != 0) {
25 throw NotImplementedException("DFMA CC");
26 }
27
28 const IR::F64 src_a{v.D(dfma.src_a_reg)};
29 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)};
30 const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)};
31
32 const IR::FpControl control{
33 .no_contraction = true,
34 .rounding = CastFpRounding(dfma.fp_rounding),
35 .fmz_mode = IR::FmzMode::None,
36 };
37
38 v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control));
39}
40} // Anonymous namespace
41
42void TranslatorVisitor::DFMA_reg(u64 insn) {
43 DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn));
44}
45
46void TranslatorVisitor::DFMA_cr(u64 insn) {
47 DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn));
48}
49
50void TranslatorVisitor::DFMA_rc(u64 insn) {
51 DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn));
52}
53
54void TranslatorVisitor::DFMA_imm(u64 insn) {
55 DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn));
56}
57
58} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
new file mode 100644
index 000000000..6b551847c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a_reg;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<45, 1, u64> negate_b;
19 BitField<46, 1, u64> abs_a;
20 BitField<47, 1, u64> cc;
21 BitField<48, 1, u64> negate_a;
22 BitField<49, 1, u64> abs_b;
23 } const dmnmx{insn};
24
25 if (dmnmx.cc != 0) {
26 throw NotImplementedException("DMNMX CC");
27 }
28
29 const IR::U1 pred{v.ir.GetPred(dmnmx.pred)};
30 const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)};
31 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)};
32
33 IR::F64 max{v.ir.FPMax(op_a, op_b)};
34 IR::F64 min{v.ir.FPMin(op_a, op_b)};
35
36 if (dmnmx.neg_pred != 0) {
37 std::swap(min, max);
38 }
39 v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)});
40}
41} // Anonymous namespace
42
43void TranslatorVisitor::DMNMX_reg(u64 insn) {
44 DMNMX(*this, insn, GetDoubleReg20(insn));
45}
46
47void TranslatorVisitor::DMNMX_cbuf(u64 insn) {
48 DMNMX(*this, insn, GetDoubleCbuf(insn));
49}
50
51void TranslatorVisitor::DMNMX_imm(u64 insn) {
52 DMNMX(*this, insn, GetDoubleImm20(insn));
53}
54
55} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
new file mode 100644
index 000000000..c0159fb65
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
@@ -0,0 +1,50 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12
13void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a_reg;
18 BitField<39, 2, FpRounding> fp_rounding;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> neg;
21 } const dmul{insn};
22
23 if (dmul.cc != 0) {
24 throw NotImplementedException("DMUL CC");
25 }
26
27 const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)};
28 const IR::FpControl control{
29 .no_contraction = true,
30 .rounding = CastFpRounding(dmul.fp_rounding),
31 .fmz_mode = IR::FmzMode::None,
32 };
33
34 v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control));
35}
36} // Anonymous namespace
37
38void TranslatorVisitor::DMUL_reg(u64 insn) {
39 DMUL(*this, insn, GetDoubleReg20(insn));
40}
41
42void TranslatorVisitor::DMUL_cbuf(u64 insn) {
43 DMUL(*this, insn, GetDoubleCbuf(insn));
44}
45
46void TranslatorVisitor::DMUL_imm(u64 insn) {
47 DMUL(*this, insn, GetDoubleImm20(insn));
48}
49
50} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
new file mode 100644
index 000000000..b8e74ee44
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
@@ -0,0 +1,54 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 3, IR::Pred> dest_pred_b;
16 BitField<3, 3, IR::Pred> dest_pred_a;
17 BitField<6, 1, u64> negate_b;
18 BitField<7, 1, u64> abs_a;
19 BitField<8, 8, IR::Reg> src_a_reg;
20 BitField<39, 3, IR::Pred> bop_pred;
21 BitField<42, 1, u64> neg_bop_pred;
22 BitField<43, 1, u64> negate_a;
23 BitField<44, 1, u64> abs_b;
24 BitField<45, 2, BooleanOp> bop;
25 BitField<48, 4, FPCompareOp> compare_op;
26 } const dsetp{insn};
27
28 const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)};
29 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)};
30
31 const BooleanOp bop{dsetp.bop};
32 const FPCompareOp compare_op{dsetp.compare_op};
33 const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)};
34 const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)};
35 const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
36 const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
37 v.ir.SetPred(dsetp.dest_pred_a, result_a);
38 v.ir.SetPred(dsetp.dest_pred_b, result_b);
39}
40} // Anonymous namespace
41
42void TranslatorVisitor::DSETP_reg(u64 insn) {
43 DSETP(*this, insn, GetDoubleReg20(insn));
44}
45
46void TranslatorVisitor::DSETP_cbuf(u64 insn) {
47 DSETP(*this, insn, GetDoubleCbuf(insn));
48}
49
50void TranslatorVisitor::DSETP_imm(u64 insn) {
51 DSETP(*this, insn, GetDoubleImm20(insn));
52}
53
54} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
new file mode 100644
index 000000000..c2443c886
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
@@ -0,0 +1,43 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void ExitFragment(TranslatorVisitor& v) {
12 const ProgramHeader sph{v.env.SPH()};
13 IR::Reg src_reg{IR::Reg::R0};
14 for (u32 render_target = 0; render_target < 8; ++render_target) {
15 const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)};
16 for (u32 component = 0; component < 4; ++component) {
17 if (!mask[component]) {
18 continue;
19 }
20 v.ir.SetFragColor(render_target, component, v.F(src_reg));
21 ++src_reg;
22 }
23 }
24 if (sph.ps.omap.sample_mask != 0) {
25 v.ir.SetSampleMask(v.X(src_reg));
26 }
27 if (sph.ps.omap.depth != 0) {
28 v.ir.SetFragDepth(v.F(src_reg + 1));
29 }
30}
31} // Anonymous namespace
32
33void TranslatorVisitor::EXIT() {
34 switch (env.ShaderStage()) {
35 case Stage::Fragment:
36 ExitFragment(*this);
37 break;
38 default:
39 break;
40 }
41}
42
43} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
new file mode 100644
index 000000000..f0cb25d61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
@@ -0,0 +1,47 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<40, 1, u64> tilde;
16 BitField<41, 1, u64> shift;
17 BitField<47, 1, u64> cc;
18 BitField<48, 1, u64> is_signed;
19 } const flo{insn};
20
21 if (flo.cc != 0) {
22 throw NotImplementedException("CC");
23 }
24 if (flo.tilde != 0) {
25 src = v.ir.BitwiseNot(src);
26 }
27 IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)};
28 if (flo.shift != 0) {
29 const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))};
30 result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))};
31 }
32 v.X(flo.dest_reg, result);
33}
34} // Anonymous namespace
35
36void TranslatorVisitor::FLO_reg(u64 insn) {
37 FLO(*this, insn, GetReg20(insn));
38}
39
40void TranslatorVisitor::FLO_cbuf(u64 insn) {
41 FLO(*this, insn, GetCbuf(insn));
42}
43
44void TranslatorVisitor::FLO_imm(u64 insn) {
45 FLO(*this, insn, GetImm20(insn));
46}
47} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
new file mode 100644
index 000000000..b8c89810c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -0,0 +1,82 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
13 const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a;
18 } const fadd{insn};
19
20 if (cc) {
21 throw NotImplementedException("FADD CC");
22 }
23 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
24 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
25 IR::FpControl control{
26 .no_contraction = true,
27 .rounding = CastFpRounding(fp_rounding),
28 .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
29 };
30 IR::F32 value{v.ir.FPAdd(op_a, op_b, control)};
31 if (sat) {
32 value = v.ir.FPSaturate(value);
33 }
34 v.F(fadd.dest_reg, value);
35}
36
37void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
38 union {
39 u64 raw;
40 BitField<39, 2, FpRounding> fp_rounding;
41 BitField<44, 1, u64> ftz;
42 BitField<45, 1, u64> neg_b;
43 BitField<46, 1, u64> abs_a;
44 BitField<47, 1, u64> cc;
45 BitField<48, 1, u64> neg_a;
46 BitField<49, 1, u64> abs_b;
47 BitField<50, 1, u64> sat;
48 } const fadd{insn};
49
50 FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b,
51 fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0);
52}
53} // Anonymous namespace
54
55void TranslatorVisitor::FADD_reg(u64 insn) {
56 FADD(*this, insn, GetFloatReg20(insn));
57}
58
59void TranslatorVisitor::FADD_cbuf(u64 insn) {
60 FADD(*this, insn, GetFloatCbuf(insn));
61}
62
63void TranslatorVisitor::FADD_imm(u64 insn) {
64 FADD(*this, insn, GetFloatImm20(insn));
65}
66
67void TranslatorVisitor::FADD32I(u64 insn) {
68 union {
69 u64 raw;
70 BitField<55, 1, u64> ftz;
71 BitField<56, 1, u64> neg_a;
72 BitField<54, 1, u64> abs_a;
73 BitField<52, 1, u64> cc;
74 BitField<53, 1, u64> neg_b;
75 BitField<57, 1, u64> abs_b;
76 } const fadd32i{insn};
77
78 FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn),
79 fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0);
80}
81
82} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
new file mode 100644
index 000000000..7127ebf54
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_reg;
17 BitField<47, 1, u64> ftz;
18 BitField<48, 4, FPCompareOp> compare_op;
19 } const fcmp{insn};
20
21 const IR::F32 zero{v.ir.Imm32(0.0f)};
22 const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)};
23 const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)};
24 const IR::U32 src_reg{v.X(fcmp.src_reg)};
25 const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
26
27 v.X(fcmp.dest_reg, result);
28}
29} // Anonymous namespace
30
31void TranslatorVisitor::FCMP_reg(u64 insn) {
32 FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn));
33}
34
35void TranslatorVisitor::FCMP_rc(u64 insn) {
36 FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn));
37}
38
39void TranslatorVisitor::FCMP_cr(u64 insn) {
40 FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn));
41}
42
43void TranslatorVisitor::FCMP_imm(u64 insn) {
44 union {
45 u64 raw;
46 BitField<20, 19, u64> value;
47 BitField<56, 1, u64> is_negative;
48 } const fcmp{insn};
49 const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0};
50 const u32 value{static_cast<u32>(fcmp.value) << 12};
51
52 FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn));
53}
54
55} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
new file mode 100644
index 000000000..eece4f28f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
@@ -0,0 +1,78 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a_reg;
17 BitField<39, 3, IR::Pred> pred;
18 BitField<42, 1, u64> neg_pred;
19 BitField<43, 1, u64> negate_a;
20 BitField<44, 1, u64> abs_b;
21 BitField<45, 2, BooleanOp> bop;
22 BitField<47, 1, u64> cc;
23 BitField<48, 4, FPCompareOp> compare_op;
24 BitField<52, 1, u64> bf;
25 BitField<53, 1, u64> negate_b;
26 BitField<54, 1, u64> abs_a;
27 BitField<55, 1, u64> ftz;
28 } const fset{insn};
29
30 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)};
31 const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0);
32 const IR::FpControl control{
33 .no_contraction = false,
34 .rounding = IR::FpRounding::DontCare,
35 .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
36 };
37
38 IR::U1 pred{v.ir.GetPred(fset.pred)};
39 if (fset.neg_pred != 0) {
40 pred = v.ir.LogicalNot(pred);
41 }
42 const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)};
43 const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)};
44
45 const IR::U32 one_mask{v.ir.Imm32(-1)};
46 const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
47 const IR::U32 zero{v.ir.Imm32(0)};
48 const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one};
49 const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
50
51 v.X(fset.dest_reg, result);
52 if (fset.cc != 0) {
53 const IR::U1 is_zero{v.ir.IEqual(result, zero)};
54 v.SetZFlag(is_zero);
55 if (fset.bf != 0) {
56 v.ResetSFlag();
57 } else {
58 v.SetSFlag(v.ir.LogicalNot(is_zero));
59 }
60 v.ResetCFlag();
61 v.ResetOFlag();
62 }
63}
64} // Anonymous namespace
65
66void TranslatorVisitor::FSET_reg(u64 insn) {
67 FSET(*this, insn, GetFloatReg20(insn));
68}
69
70void TranslatorVisitor::FSET_cbuf(u64 insn) {
71 FSET(*this, insn, GetFloatCbuf(insn));
72}
73
74void TranslatorVisitor::FSET_imm(u64 insn) {
75 FSET(*this, insn, GetFloatImm20(insn));
76}
77
78} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
new file mode 100644
index 000000000..02ab023c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
@@ -0,0 +1,214 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
6#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
7
8namespace Shader::Maxwell {
9namespace {
10enum class FloatFormat : u64 {
11 F16 = 1,
12 F32 = 2,
13 F64 = 3,
14};
15
16enum class RoundingOp : u64 {
17 None = 0,
18 Pass = 3,
19 Round = 8,
20 Floor = 9,
21 Ceil = 10,
22 Trunc = 11,
23};
24
25[[nodiscard]] u32 WidthSize(FloatFormat width) {
26 switch (width) {
27 case FloatFormat::F16:
28 return 16;
29 case FloatFormat::F32:
30 return 32;
31 case FloatFormat::F64:
32 return 64;
33 default:
34 throw NotImplementedException("Invalid width {}", width);
35 }
36}
37
38void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
39 union {
40 u64 insn;
41 BitField<0, 8, IR::Reg> dest_reg;
42 BitField<44, 1, u64> ftz;
43 BitField<45, 1, u64> neg;
44 BitField<47, 1, u64> cc;
45 BitField<50, 1, u64> sat;
46 BitField<39, 4, u64> rounding_op;
47 BitField<39, 2, FpRounding> rounding;
48 BitField<10, 2, FloatFormat> src_size;
49 BitField<8, 2, FloatFormat> dst_size;
50
51 [[nodiscard]] RoundingOp RoundingOperation() const {
52 constexpr u64 rounding_mask = 0x0B;
53 return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask);
54 }
55 } const f2f{insn};
56
57 if (f2f.cc != 0) {
58 throw NotImplementedException("F2F CC");
59 }
60
61 IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)};
62
63 const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
64 IR::FpControl fp_control{
65 .no_contraction = false,
66 .rounding = IR::FpRounding::DontCare,
67 .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None),
68 };
69 if (f2f.src_size != f2f.dst_size) {
70 fp_control.rounding = CastFpRounding(f2f.rounding);
71 input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control);
72 } else {
73 switch (f2f.RoundingOperation()) {
74 case RoundingOp::None:
75 case RoundingOp::Pass:
76 // Make sure NANs are handled properly
77 switch (f2f.src_size) {
78 case FloatFormat::F16:
79 input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control);
80 break;
81 case FloatFormat::F32:
82 input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control);
83 break;
84 case FloatFormat::F64:
85 input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control);
86 break;
87 }
88 break;
89 case RoundingOp::Round:
90 input = v.ir.FPRoundEven(input, fp_control);
91 break;
92 case RoundingOp::Floor:
93 input = v.ir.FPFloor(input, fp_control);
94 break;
95 case RoundingOp::Ceil:
96 input = v.ir.FPCeil(input, fp_control);
97 break;
98 case RoundingOp::Trunc:
99 input = v.ir.FPTrunc(input, fp_control);
100 break;
101 default:
102 throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value());
103 }
104 }
105 if (f2f.sat != 0 && !any_fp64) {
106 input = v.ir.FPSaturate(input);
107 }
108
109 switch (f2f.dst_size) {
110 case FloatFormat::F16: {
111 const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
112 v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm)));
113 break;
114 }
115 case FloatFormat::F32:
116 v.F(f2f.dest_reg, input);
117 break;
118 case FloatFormat::F64:
119 v.D(f2f.dest_reg, input);
120 break;
121 default:
122 throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value());
123 }
124}
125} // Anonymous namespace
126
127void TranslatorVisitor::F2F_reg(u64 insn) {
128 union {
129 u64 insn;
130 BitField<49, 1, u64> abs;
131 BitField<10, 2, FloatFormat> src_size;
132 BitField<41, 1, u64> selector;
133 } const f2f{insn};
134
135 IR::F16F32F64 src_a;
136 switch (f2f.src_size) {
137 case FloatFormat::F16: {
138 auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)};
139 src_a = f2f.selector != 0 ? rhs_a : lhs_a;
140 break;
141 }
142 case FloatFormat::F32:
143 src_a = GetFloatReg20(insn);
144 break;
145 case FloatFormat::F64:
146 src_a = GetDoubleReg20(insn);
147 break;
148 default:
149 throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
150 }
151 F2F(*this, insn, src_a, f2f.abs != 0);
152}
153
154void TranslatorVisitor::F2F_cbuf(u64 insn) {
155 union {
156 u64 insn;
157 BitField<49, 1, u64> abs;
158 BitField<10, 2, FloatFormat> src_size;
159 BitField<41, 1, u64> selector;
160 } const f2f{insn};
161
162 IR::F16F32F64 src_a;
163 switch (f2f.src_size) {
164 case FloatFormat::F16: {
165 auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)};
166 src_a = f2f.selector != 0 ? rhs_a : lhs_a;
167 break;
168 }
169 case FloatFormat::F32:
170 src_a = GetFloatCbuf(insn);
171 break;
172 case FloatFormat::F64:
173 src_a = GetDoubleCbuf(insn);
174 break;
175 default:
176 throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
177 }
178 F2F(*this, insn, src_a, f2f.abs != 0);
179}
180
181void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
182 union {
183 u64 insn;
184 BitField<49, 1, u64> abs;
185 BitField<10, 2, FloatFormat> src_size;
186 BitField<41, 1, u64> selector;
187 BitField<20, 19, u64> imm;
188 BitField<56, 1, u64> imm_neg;
189 } const f2f{insn};
190
191 IR::F16F32F64 src_a;
192 switch (f2f.src_size) {
193 case FloatFormat::F16: {
194 const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)};
195 const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))};
196 src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)};
197 if (f2f.imm_neg != 0) {
198 throw NotImplementedException("Neg bit on F16");
199 }
200 break;
201 }
202 case FloatFormat::F32:
203 src_a = GetFloatImm20(insn);
204 break;
205 case FloatFormat::F64:
206 src_a = GetDoubleImm20(insn);
207 break;
208 default:
209 throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
210 }
211 F2F(*this, insn, src_a, f2f.abs != 0);
212}
213
214} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
new file mode 100644
index 000000000..92b1ce015
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
@@ -0,0 +1,253 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <limits>
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/maxwell/opcodes.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class DestFormat : u64 {
15 Invalid,
16 I16,
17 I32,
18 I64,
19};
20enum class SrcFormat : u64 {
21 Invalid,
22 F16,
23 F32,
24 F64,
25};
26enum class Rounding : u64 {
27 Round,
28 Floor,
29 Ceil,
30 Trunc,
31};
32
33union F2I {
34 u64 raw;
35 BitField<0, 8, IR::Reg> dest_reg;
36 BitField<8, 2, DestFormat> dest_format;
37 BitField<10, 2, SrcFormat> src_format;
38 BitField<12, 1, u64> is_signed;
39 BitField<39, 2, Rounding> rounding;
40 BitField<41, 1, u64> half;
41 BitField<44, 1, u64> ftz;
42 BitField<45, 1, u64> abs;
43 BitField<47, 1, u64> cc;
44 BitField<49, 1, u64> neg;
45};
46
47size_t BitSize(DestFormat dest_format) {
48 switch (dest_format) {
49 case DestFormat::I16:
50 return 16;
51 case DestFormat::I32:
52 return 32;
53 case DestFormat::I64:
54 return 64;
55 default:
56 throw NotImplementedException("Invalid destination format {}", dest_format);
57 }
58}
59
60std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) {
61 if (is_signed) {
62 switch (format) {
63 case DestFormat::I16:
64 return {static_cast<f64>(std::numeric_limits<s16>::max()),
65 static_cast<f64>(std::numeric_limits<s16>::min())};
66 case DestFormat::I32:
67 return {static_cast<f64>(std::numeric_limits<s32>::max()),
68 static_cast<f64>(std::numeric_limits<s32>::min())};
69 case DestFormat::I64:
70 return {static_cast<f64>(std::numeric_limits<s64>::max()),
71 static_cast<f64>(std::numeric_limits<s64>::min())};
72 default:
73 break;
74 }
75 } else {
76 switch (format) {
77 case DestFormat::I16:
78 return {static_cast<f64>(std::numeric_limits<u16>::max()),
79 static_cast<f64>(std::numeric_limits<u16>::min())};
80 case DestFormat::I32:
81 return {static_cast<f64>(std::numeric_limits<u32>::max()),
82 static_cast<f64>(std::numeric_limits<u32>::min())};
83 case DestFormat::I64:
84 return {static_cast<f64>(std::numeric_limits<u64>::max()),
85 static_cast<f64>(std::numeric_limits<u64>::min())};
86 default:
87 break;
88 }
89 }
90 throw NotImplementedException("Invalid destination format {}", format);
91}
92
93IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
94 union {
95 u64 raw;
96 BitField<20, 14, s64> offset;
97 BitField<34, 5, u64> binding;
98 } const cbuf{insn};
99 if (cbuf.binding >= 18) {
100 throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
101 }
102 if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) {
103 throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4);
104 }
105 if (cbuf.offset % 2 != 0) {
106 throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4);
107 }
108 const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))};
109 const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)};
110 const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)};
111 const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)};
112 return v.ir.PackDouble2x32(vector);
113}
114
115void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
116 // F2I is used to convert from a floating point value to an integer
117 const F2I f2i{insn};
118
119 const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 &&
120 f2i.dest_format != DestFormat::I64};
121 IR::FmzMode fmz_mode{IR::FmzMode::DontCare};
122 if (denorm_cares) {
123 fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None;
124 }
125 const IR::FpControl fp_control{
126 .no_contraction = true,
127 .rounding = IR::FpRounding::DontCare,
128 .fmz_mode = fmz_mode,
129 };
130 const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
131 const IR::F16F32F64 rounded_value{[&] {
132 switch (f2i.rounding) {
133 case Rounding::Round:
134 return v.ir.FPRoundEven(op_a, fp_control);
135 case Rounding::Floor:
136 return v.ir.FPFloor(op_a, fp_control);
137 case Rounding::Ceil:
138 return v.ir.FPCeil(op_a, fp_control);
139 case Rounding::Trunc:
140 return v.ir.FPTrunc(op_a, fp_control);
141 default:
142 throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value());
143 }
144 }()};
145 const bool is_signed{f2i.is_signed != 0};
146 const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed);
147
148 IR::F16F32F64 intermediate;
149 switch (f2i.src_format) {
150 case SrcFormat::F16: {
151 const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))};
152 const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))};
153 intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
154 break;
155 }
156 case SrcFormat::F32: {
157 const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))};
158 const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))};
159 intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
160 break;
161 }
162 case SrcFormat::F64: {
163 const IR::F64 max_val{v.ir.Imm64(max_bound)};
164 const IR::F64 min_val{v.ir.Imm64(min_bound)};
165 intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
166 break;
167 }
168 default:
169 throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value());
170 }
171
172 const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))};
173 IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)};
174
175 bool handled_special_case = false;
176 const bool special_nan_cases =
177 (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64);
178 if (special_nan_cases) {
179 if (f2i.dest_format == DestFormat::I32) {
180 handled_special_case = true;
181 result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)};
182 } else if (f2i.dest_format == DestFormat::I64) {
183 handled_special_case = true;
184 result = IR::U64{
185 v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)};
186 }
187 }
188 if (!handled_special_case && is_signed) {
189 if (bitsize != 64) {
190 result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
191 } else {
192 result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)};
193 }
194 }
195
196 if (bitsize == 64) {
197 v.L(f2i.dest_reg, result);
198 } else {
199 v.X(f2i.dest_reg, result);
200 }
201
202 if (f2i.cc != 0) {
203 throw NotImplementedException("F2I CC");
204 }
205}
206} // Anonymous namespace
207
208void TranslatorVisitor::F2I_reg(u64 insn) {
209 union {
210 u64 raw;
211 F2I base;
212 BitField<20, 8, IR::Reg> src_reg;
213 } const f2i{insn};
214
215 const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
216 switch (f2i.base.src_format) {
217 case SrcFormat::F16:
218 return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)};
219 case SrcFormat::F32:
220 return F(f2i.src_reg);
221 case SrcFormat::F64:
222 return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1)));
223 default:
224 throw NotImplementedException("Invalid F2I source format {}",
225 f2i.base.src_format.Value());
226 }
227 }()};
228 TranslateF2I(*this, insn, op_a);
229}
230
231void TranslatorVisitor::F2I_cbuf(u64 insn) {
232 const F2I f2i{insn};
233 const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
234 switch (f2i.src_format) {
235 case SrcFormat::F16:
236 return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)};
237 case SrcFormat::F32:
238 return GetFloatCbuf(insn);
239 case SrcFormat::F64: {
240 return UnpackCbuf(*this, insn);
241 }
242 default:
243 throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value());
244 }
245 }()};
246 TranslateF2I(*this, insn, op_a);
247}
248
249void TranslatorVisitor::F2I_imm(u64) {
250 throw NotImplementedException("{}", Opcode::F2I_imm);
251}
252
253} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fa2a7807b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -0,0 +1,94 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a,
13 bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a;
18 } const ffma{insn};
19
20 if (cc) {
21 throw NotImplementedException("FFMA CC");
22 }
23 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)};
24 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
25 const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
26 const IR::FpControl fp_control{
27 .no_contraction = true,
28 .rounding = CastFpRounding(fp_rounding),
29 .fmz_mode = CastFmzMode(fmz_mode),
30 };
31 IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)};
32 if (fmz_mode == FmzMode::FMZ && !sat) {
33 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
34 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
35 const IR::F32 zero{v.ir.Imm32(0.0f)};
36 const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
37 const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
38 const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
39 value = IR::F32{v.ir.Select(any_zero, op_c, value)};
40 }
41 if (sat) {
42 value = v.ir.FPSaturate(value);
43 }
44 v.F(ffma.dest_reg, value);
45}
46
47void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) {
48 union {
49 u64 raw;
50 BitField<47, 1, u64> cc;
51 BitField<48, 1, u64> neg_b;
52 BitField<49, 1, u64> neg_c;
53 BitField<50, 1, u64> sat;
54 BitField<51, 2, FpRounding> fp_rounding;
55 BitField<53, 2, FmzMode> fmz_mode;
56 } const ffma{insn};
57
58 FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0,
59 ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding);
60}
61} // Anonymous namespace
62
63void TranslatorVisitor::FFMA_reg(u64 insn) {
64 FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn));
65}
66
67void TranslatorVisitor::FFMA_rc(u64 insn) {
68 FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn));
69}
70
71void TranslatorVisitor::FFMA_cr(u64 insn) {
72 FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn));
73}
74
75void TranslatorVisitor::FFMA_imm(u64 insn) {
76 FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn));
77}
78
79void TranslatorVisitor::FFMA32I(u64 insn) {
80 union {
81 u64 raw;
82 BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register
83 BitField<52, 1, u64> cc;
84 BitField<53, 2, FmzMode> fmz_mode;
85 BitField<55, 1, u64> sat;
86 BitField<56, 1, u64> neg_a;
87 BitField<57, 1, u64> neg_c;
88 } const ffma32i{insn};
89
90 FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false,
91 ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN);
92}
93
94} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
new file mode 100644
index 000000000..c0d6ee5af
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a_reg;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<44, 1, u64> ftz;
19 BitField<45, 1, u64> negate_b;
20 BitField<46, 1, u64> abs_a;
21 BitField<47, 1, u64> cc;
22 BitField<48, 1, u64> negate_a;
23 BitField<49, 1, u64> abs_b;
24 } const fmnmx{insn};
25
26 if (fmnmx.cc) {
27 throw NotImplementedException("FMNMX CC");
28 }
29
30 const IR::U1 pred{v.ir.GetPred(fmnmx.pred)};
31 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)};
32 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)};
33
34 const IR::FpControl control{
35 .no_contraction = false,
36 .rounding = IR::FpRounding::DontCare,
37 .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
38 };
39 IR::F32 max{v.ir.FPMax(op_a, op_b, control)};
40 IR::F32 min{v.ir.FPMin(op_a, op_b, control)};
41
42 if (fmnmx.neg_pred != 0) {
43 std::swap(min, max);
44 }
45
46 v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)});
47}
48} // Anonymous namespace
49
50void TranslatorVisitor::FMNMX_reg(u64 insn) {
51 FMNMX(*this, insn, GetFloatReg20(insn));
52}
53
54void TranslatorVisitor::FMNMX_cbuf(u64 insn) {
55 FMNMX(*this, insn, GetFloatCbuf(insn));
56}
57
58void TranslatorVisitor::FMNMX_imm(u64 insn) {
59 FMNMX(*this, insn, GetFloatImm20(insn));
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
new file mode 100644
index 000000000..2f8605619
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class Operation : u64 {
14 Cos = 0,
15 Sin = 1,
16 Ex2 = 2, // Base 2 exponent
17 Lg2 = 3, // Base 2 logarithm
18 Rcp = 4, // Reciprocal
19 Rsq = 5, // Reciprocal square root
20 Rcp64H = 6, // 64-bit reciprocal
21 Rsq64H = 7, // 64-bit reciprocal square root
22 Sqrt = 8,
23};
24} // Anonymous namespace
25
26void TranslatorVisitor::MUFU(u64 insn) {
27 // MUFU is used to implement a bunch of special functions. See Operation.
28 union {
29 u64 raw;
30 BitField<0, 8, IR::Reg> dest_reg;
31 BitField<8, 8, IR::Reg> src_reg;
32 BitField<20, 4, Operation> operation;
33 BitField<46, 1, u64> abs;
34 BitField<48, 1, u64> neg;
35 BitField<50, 1, u64> sat;
36 } const mufu{insn};
37
38 const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)};
39 IR::F32 value{[&]() -> IR::F32 {
40 switch (mufu.operation) {
41 case Operation::Cos:
42 return ir.FPCos(op_a);
43 case Operation::Sin:
44 return ir.FPSin(op_a);
45 case Operation::Ex2:
46 return ir.FPExp2(op_a);
47 case Operation::Lg2:
48 return ir.FPLog2(op_a);
49 case Operation::Rcp:
50 return ir.FPRecip(op_a);
51 case Operation::Rsq:
52 return ir.FPRecipSqrt(op_a);
53 case Operation::Rcp64H:
54 throw NotImplementedException("MUFU.RCP64H");
55 case Operation::Rsq64H:
56 throw NotImplementedException("MUFU.RSQ64H");
57 case Operation::Sqrt:
58 return ir.FPSqrt(op_a);
59 default:
60 throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value());
61 }
62 }()};
63
64 if (mufu.sat) {
65 value = ir.FPSaturate(value);
66 }
67
68 F(mufu.dest_reg, value);
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
new file mode 100644
index 000000000..06226b7ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -0,0 +1,127 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/ir/ir_emitter.h"
8#include "shader_recompiler/frontend/ir/modifiers.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Scale : u64 {
15 None,
16 D2,
17 D4,
18 D8,
19 M8,
20 M4,
21 M2,
22 INVALIDSCALE37,
23};
24
25float ScaleFactor(Scale scale) {
26 switch (scale) {
27 case Scale::None:
28 return 1.0f;
29 case Scale::D2:
30 return 1.0f / 2.0f;
31 case Scale::D4:
32 return 1.0f / 4.0f;
33 case Scale::D8:
34 return 1.0f / 8.0f;
35 case Scale::M8:
36 return 8.0f;
37 case Scale::M4:
38 return 4.0f;
39 case Scale::M2:
40 return 2.0f;
41 case Scale::INVALIDSCALE37:
42 break;
43 }
44 throw NotImplementedException("Invalid FMUL scale {}", scale);
45}
46
47void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode,
48 FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
49 union {
50 u64 raw;
51 BitField<0, 8, IR::Reg> dest_reg;
52 BitField<8, 8, IR::Reg> src_a;
53 } const fmul{insn};
54
55 if (cc) {
56 throw NotImplementedException("FMUL CC");
57 }
58 IR::F32 op_a{v.F(fmul.src_a)};
59 if (scale != Scale::None) {
60 if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
61 throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
62 }
63 op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
64 }
65 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
66 const IR::FpControl fp_control{
67 .no_contraction = true,
68 .rounding = CastFpRounding(fp_rounding),
69 .fmz_mode = CastFmzMode(fmz_mode),
70 };
71 IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
72 if (fmz_mode == FmzMode::FMZ && !sat) {
73 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
74 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
75 const IR::F32 zero{v.ir.Imm32(0.0f)};
76 const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
77 const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
78 const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
79 value = IR::F32{v.ir.Select(any_zero, zero, value)};
80 }
81 if (sat) {
82 value = v.ir.FPSaturate(value);
83 }
84 v.F(fmul.dest_reg, value);
85}
86
87void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
88 union {
89 u64 raw;
90 BitField<39, 2, FpRounding> fp_rounding;
91 BitField<41, 3, Scale> scale;
92 BitField<44, 2, FmzMode> fmz;
93 BitField<47, 1, u64> cc;
94 BitField<48, 1, u64> neg_b;
95 BitField<50, 1, u64> sat;
96 } const fmul{insn};
97
98 FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
99 fmul.neg_b != 0);
100}
101} // Anonymous namespace
102
103void TranslatorVisitor::FMUL_reg(u64 insn) {
104 return FMUL(*this, insn, GetFloatReg20(insn));
105}
106
107void TranslatorVisitor::FMUL_cbuf(u64 insn) {
108 return FMUL(*this, insn, GetFloatCbuf(insn));
109}
110
111void TranslatorVisitor::FMUL_imm(u64 insn) {
112 return FMUL(*this, insn, GetFloatImm20(insn));
113}
114
115void TranslatorVisitor::FMUL32I(u64 insn) {
116 union {
117 u64 raw;
118 BitField<52, 1, u64> cc;
119 BitField<53, 2, FmzMode> fmz;
120 BitField<55, 1, u64> sat;
121 } const fmul32i{insn};
122
123 FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None,
124 fmul32i.sat != 0, fmul32i.cc != 0, false);
125}
126
127} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
new file mode 100644
index 000000000..f91b93fad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
@@ -0,0 +1,41 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 SINCOS,
13 EX2,
14};
15
16void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) {
17 union {
18 u64 raw;
19 BitField<0, 8, IR::Reg> dest_reg;
20 BitField<39, 1, Mode> mode;
21 BitField<45, 1, u64> neg;
22 BitField<49, 1, u64> abs;
23 } const rro{insn};
24
25 v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0));
26}
27} // Anonymous namespace
28
29void TranslatorVisitor::RRO_reg(u64 insn) {
30 RRO(*this, insn, GetFloatReg20(insn));
31}
32
33void TranslatorVisitor::RRO_cbuf(u64 insn) {
34 RRO(*this, insn, GetFloatCbuf(insn));
35}
36
37void TranslatorVisitor::RRO_imm(u64) {
38 throw NotImplementedException("RRO (imm)");
39}
40
41} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
new file mode 100644
index 000000000..5f93a1513
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
@@ -0,0 +1,60 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 3, IR::Pred> dest_pred_b;
16 BitField<3, 3, IR::Pred> dest_pred_a;
17 BitField<6, 1, u64> negate_b;
18 BitField<7, 1, u64> abs_a;
19 BitField<8, 8, IR::Reg> src_a_reg;
20 BitField<39, 3, IR::Pred> bop_pred;
21 BitField<42, 1, u64> neg_bop_pred;
22 BitField<43, 1, u64> negate_a;
23 BitField<44, 1, u64> abs_b;
24 BitField<45, 2, BooleanOp> bop;
25 BitField<47, 1, u64> ftz;
26 BitField<48, 4, FPCompareOp> compare_op;
27 } const fsetp{insn};
28
29 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)};
30 const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0);
31 const IR::FpControl control{
32 .no_contraction = false,
33 .rounding = IR::FpRounding::DontCare,
34 .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
35 };
36
37 const BooleanOp bop{fsetp.bop};
38 const FPCompareOp compare_op{fsetp.compare_op};
39 const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)};
40 const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)};
41 const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
42 const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
43 v.ir.SetPred(fsetp.dest_pred_a, result_a);
44 v.ir.SetPred(fsetp.dest_pred_b, result_b);
45}
46} // Anonymous namespace
47
48void TranslatorVisitor::FSETP_reg(u64 insn) {
49 FSETP(*this, insn, GetFloatReg20(insn));
50}
51
52void TranslatorVisitor::FSETP_cbuf(u64 insn) {
53 FSETP(*this, insn, GetFloatCbuf(insn));
54}
55
56void TranslatorVisitor::FSETP_imm(u64 insn) {
57 FSETP(*this, insn, GetFloatImm20(insn));
58}
59
60} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
new file mode 100644
index 000000000..7550a8d4c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::FSWZADD(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<28, 8, u64> swizzle;
16 BitField<38, 1, u64> ndv;
17 BitField<39, 2, FpRounding> round;
18 BitField<44, 1, u64> ftz;
19 BitField<47, 1, u64> cc;
20 } const fswzadd{insn};
21
22 if (fswzadd.ndv != 0) {
23 throw NotImplementedException("FSWZADD NDV");
24 }
25
26 const IR::F32 src_a{GetFloatReg8(insn)};
27 const IR::F32 src_b{GetFloatReg20(insn)};
28 const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
29
30 const IR::FpControl fp_control{
31 .no_contraction = false,
32 .rounding = CastFpRounding(fswzadd.round),
33 .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
34 };
35
36 const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
37 F(fswzadd.dest_reg, result);
38
39 if (fswzadd.cc != 0) {
40 throw NotImplementedException("FSWZADD CC");
41 }
42}
43
44} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
new file mode 100644
index 000000000..f2738a93b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
@@ -0,0 +1,125 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,
10 Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {
11 union {
12 u64 raw;
13 BitField<0, 8, IR::Reg> dest_reg;
14 BitField<8, 8, IR::Reg> src_a;
15 } const hadd2{insn};
16
17 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)};
18 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
19 const bool promotion{lhs_a.Type() != lhs_b.Type()};
20 if (promotion) {
21 if (lhs_a.Type() == IR::Type::F16) {
22 lhs_a = v.ir.FPConvert(32, lhs_a);
23 rhs_a = v.ir.FPConvert(32, rhs_a);
24 }
25 if (lhs_b.Type() == IR::Type::F16) {
26 lhs_b = v.ir.FPConvert(32, lhs_b);
27 rhs_b = v.ir.FPConvert(32, rhs_b);
28 }
29 }
30 lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
31 rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
32
33 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
34 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
35
36 const IR::FpControl fp_control{
37 .no_contraction = true,
38 .rounding = IR::FpRounding::DontCare,
39 .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
40 };
41 IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
42 IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
43 if (sat) {
44 lhs = v.ir.FPSaturate(lhs);
45 rhs = v.ir.FPSaturate(rhs);
46 }
47 if (promotion) {
48 lhs = v.ir.FPConvert(16, lhs);
49 rhs = v.ir.FPConvert(16, rhs);
50 }
51 v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge));
52}
53
54void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b,
55 const IR::U32& src_b) {
56 union {
57 u64 raw;
58 BitField<49, 2, Merge> merge;
59 BitField<39, 1, u64> ftz;
60 BitField<43, 1, u64> neg_a;
61 BitField<44, 1, u64> abs_a;
62 BitField<47, 2, Swizzle> swizzle_a;
63 } const hadd2{insn};
64
65 HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0,
66 hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b);
67}
68} // Anonymous namespace
69
70void TranslatorVisitor::HADD2_reg(u64 insn) {
71 union {
72 u64 raw;
73 BitField<32, 1, u64> sat;
74 BitField<31, 1, u64> neg_b;
75 BitField<30, 1, u64> abs_b;
76 BitField<28, 2, Swizzle> swizzle_b;
77 } const hadd2{insn};
78
79 HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b,
80 GetReg20(insn));
81}
82
83void TranslatorVisitor::HADD2_cbuf(u64 insn) {
84 union {
85 u64 raw;
86 BitField<52, 1, u64> sat;
87 BitField<56, 1, u64> neg_b;
88 BitField<54, 1, u64> abs_b;
89 } const hadd2{insn};
90
91 HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32,
92 GetCbuf(insn));
93}
94
95void TranslatorVisitor::HADD2_imm(u64 insn) {
96 union {
97 u64 raw;
98 BitField<52, 1, u64> sat;
99 BitField<56, 1, u64> neg_high;
100 BitField<30, 9, u64> high;
101 BitField<29, 1, u64> neg_low;
102 BitField<20, 9, u64> low;
103 } const hadd2{insn};
104
105 const u32 imm{
106 static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) |
107 static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)};
108 HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm));
109}
110
111void TranslatorVisitor::HADD2_32I(u64 insn) {
112 union {
113 u64 raw;
114 BitField<55, 1, u64> ftz;
115 BitField<52, 1, u64> sat;
116 BitField<56, 1, u64> neg_a;
117 BitField<53, 2, Swizzle> swizzle_a;
118 BitField<20, 32, u64> imm32;
119 } const hadd2{insn};
120
121 const u32 imm{static_cast<u32>(hadd2.imm32)};
122 HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0,
123 hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
124}
125} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fd7986701
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
@@ -0,0 +1,169 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c,
10 Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c,
11 bool sat, HalfPrecision precision) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a;
16 } const hfma2{insn};
17
18 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)};
19 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
20 auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)};
21 const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()};
22 if (promotion) {
23 if (lhs_a.Type() == IR::Type::F16) {
24 lhs_a = v.ir.FPConvert(32, lhs_a);
25 rhs_a = v.ir.FPConvert(32, rhs_a);
26 }
27 if (lhs_b.Type() == IR::Type::F16) {
28 lhs_b = v.ir.FPConvert(32, lhs_b);
29 rhs_b = v.ir.FPConvert(32, rhs_b);
30 }
31 if (lhs_c.Type() == IR::Type::F16) {
32 lhs_c = v.ir.FPConvert(32, lhs_c);
33 rhs_c = v.ir.FPConvert(32, rhs_c);
34 }
35 }
36
37 lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b);
38 rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b);
39
40 lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c);
41 rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c);
42
43 const IR::FpControl fp_control{
44 .no_contraction = true,
45 .rounding = IR::FpRounding::DontCare,
46 .fmz_mode = HalfPrecision2FmzMode(precision),
47 };
48 IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)};
49 IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)};
50 if (precision == HalfPrecision::FMZ && !sat) {
51 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
52 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
53 const IR::F32 zero{v.ir.Imm32(0.0f)};
54 const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
55 const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
56 const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
57 lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)};
58
59 const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
60 const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
61 const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
62 rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)};
63 }
64 if (sat) {
65 lhs = v.ir.FPSaturate(lhs);
66 rhs = v.ir.FPSaturate(rhs);
67 }
68 if (promotion) {
69 lhs = v.ir.FPConvert(16, lhs);
70 rhs = v.ir.FPConvert(16, rhs);
71 }
72 v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge));
73}
74
75void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b,
76 Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat,
77 HalfPrecision precision) {
78 union {
79 u64 raw;
80 BitField<47, 2, Swizzle> swizzle_a;
81 BitField<49, 2, Merge> merge;
82 } const hfma2{insn};
83
84 HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c,
85 sat, precision);
86}
87} // Anonymous namespace
88
89void TranslatorVisitor::HFMA2_reg(u64 insn) {
90 union {
91 u64 raw;
92 BitField<28, 2, Swizzle> swizzle_b;
93 BitField<32, 1, u64> saturate;
94 BitField<31, 1, u64> neg_b;
95 BitField<30, 1, u64> neg_c;
96 BitField<35, 2, Swizzle> swizzle_c;
97 BitField<37, 2, HalfPrecision> precision;
98 } const hfma2{insn};
99
100 HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c,
101 GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
102}
103
104void TranslatorVisitor::HFMA2_rc(u64 insn) {
105 union {
106 u64 raw;
107 BitField<51, 1, u64> neg_c;
108 BitField<52, 1, u64> saturate;
109 BitField<53, 2, Swizzle> swizzle_b;
110 BitField<56, 1, u64> neg_b;
111 BitField<57, 2, HalfPrecision> precision;
112 } const hfma2{insn};
113
114 HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32,
115 GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision);
116}
117
118void TranslatorVisitor::HFMA2_cr(u64 insn) {
119 union {
120 u64 raw;
121 BitField<51, 1, u64> neg_c;
122 BitField<52, 1, u64> saturate;
123 BitField<53, 2, Swizzle> swizzle_c;
124 BitField<56, 1, u64> neg_b;
125 BitField<57, 2, HalfPrecision> precision;
126 } const hfma2{insn};
127
128 HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c,
129 GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
130}
131
132void TranslatorVisitor::HFMA2_imm(u64 insn) {
133 union {
134 u64 raw;
135 BitField<51, 1, u64> neg_c;
136 BitField<52, 1, u64> saturate;
137 BitField<53, 2, Swizzle> swizzle_c;
138
139 BitField<56, 1, u64> neg_high;
140 BitField<30, 9, u64> high;
141 BitField<29, 1, u64> neg_low;
142 BitField<20, 9, u64> low;
143 BitField<57, 2, HalfPrecision> precision;
144 } const hfma2{insn};
145
146 const u32 imm{
147 static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) |
148 static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)};
149
150 HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm),
151 GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
152}
153
154void TranslatorVisitor::HFMA2_32I(u64 insn) {
155 union {
156 u64 raw;
157 BitField<0, 8, IR::Reg> src_c;
158 BitField<20, 32, u64> imm32;
159 BitField<52, 1, u64> neg_c;
160 BitField<53, 2, Swizzle> swizzle_a;
161 BitField<55, 2, HalfPrecision> precision;
162 } const hfma2{insn};
163
164 const u32 imm{static_cast<u32>(hfma2.imm32)};
165 HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0,
166 Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision);
167}
168
169} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
new file mode 100644
index 000000000..0dbeb7f56
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8
9IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) {
10 switch (precision) {
11 case HalfPrecision::None:
12 return IR::FmzMode::None;
13 case HalfPrecision::FTZ:
14 return IR::FmzMode::FTZ;
15 case HalfPrecision::FMZ:
16 return IR::FmzMode::FMZ;
17 default:
18 return IR::FmzMode::DontCare;
19 }
20}
21
22std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) {
23 switch (swizzle) {
24 case Swizzle::H1_H0: {
25 const IR::Value vector{ir.UnpackFloat2x16(value)};
26 return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}};
27 }
28 case Swizzle::H0_H0: {
29 const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)};
30 return {scalar, scalar};
31 }
32 case Swizzle::H1_H1: {
33 const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)};
34 return {scalar, scalar};
35 }
36 case Swizzle::F32: {
37 const IR::F32 scalar{ir.BitCast<IR::F32>(value)};
38 return {scalar, scalar};
39 }
40 }
41 throw InvalidArgument("Invalid swizzle {}", swizzle);
42}
43
44IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
45 Merge merge) {
46 switch (merge) {
47 case Merge::H1_H0:
48 return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs));
49 case Merge::F32:
50 return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs));
51 case Merge::MRG_H0:
52 case Merge::MRG_H1: {
53 const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))};
54 const bool is_h0{merge == Merge::MRG_H0};
55 const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)};
56 return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1));
57 }
58 }
59 throw InvalidArgument("Invalid merge {}", merge);
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
new file mode 100644
index 000000000..59da56a7e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
@@ -0,0 +1,42 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
11#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
12
13namespace Shader::Maxwell {
14
15enum class Merge : u64 {
16 H1_H0,
17 F32,
18 MRG_H0,
19 MRG_H1,
20};
21
22enum class Swizzle : u64 {
23 H1_H0,
24 F32,
25 H0_H0,
26 H1_H1,
27};
28
29enum class HalfPrecision : u64 {
30 None = 0,
31 FTZ = 1,
32 FMZ = 2,
33};
34
35IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision);
36
37std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle);
38
39IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
40 Merge merge);
41
42} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
new file mode 100644
index 000000000..3f548ce76
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
@@ -0,0 +1,143 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a,
10 Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b,
11 HalfPrecision precision) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a;
16 } const hmul2{insn};
17
18 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)};
19 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
20 const bool promotion{lhs_a.Type() != lhs_b.Type()};
21 if (promotion) {
22 if (lhs_a.Type() == IR::Type::F16) {
23 lhs_a = v.ir.FPConvert(32, lhs_a);
24 rhs_a = v.ir.FPConvert(32, rhs_a);
25 }
26 if (lhs_b.Type() == IR::Type::F16) {
27 lhs_b = v.ir.FPConvert(32, lhs_b);
28 rhs_b = v.ir.FPConvert(32, rhs_b);
29 }
30 }
31 lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
32 rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
33
34 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
35 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
36
37 const IR::FpControl fp_control{
38 .no_contraction = true,
39 .rounding = IR::FpRounding::DontCare,
40 .fmz_mode = HalfPrecision2FmzMode(precision),
41 };
42 IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)};
43 IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)};
44 if (precision == HalfPrecision::FMZ && !sat) {
45 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
46 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
47 const IR::F32 zero{v.ir.Imm32(0.0f)};
48 const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
49 const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
50 const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
51 lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)};
52
53 const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
54 const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
55 const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
56 rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)};
57 }
58 if (sat) {
59 lhs = v.ir.FPSaturate(lhs);
60 rhs = v.ir.FPSaturate(rhs);
61 }
62 if (promotion) {
63 lhs = v.ir.FPConvert(16, lhs);
64 rhs = v.ir.FPConvert(16, rhs);
65 }
66 v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge));
67}
68
69void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b,
70 Swizzle swizzle_b, const IR::U32& src_b) {
71 union {
72 u64 raw;
73 BitField<49, 2, Merge> merge;
74 BitField<47, 2, Swizzle> swizzle_a;
75 BitField<39, 2, HalfPrecision> precision;
76 } const hmul2{insn};
77
78 HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b,
79 hmul2.precision);
80}
81} // Anonymous namespace
82
83void TranslatorVisitor::HMUL2_reg(u64 insn) {
84 union {
85 u64 raw;
86 BitField<32, 1, u64> sat;
87 BitField<31, 1, u64> neg_b;
88 BitField<30, 1, u64> abs_b;
89 BitField<44, 1, u64> abs_a;
90 BitField<28, 2, Swizzle> swizzle_b;
91 } const hmul2{insn};
92
93 HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0,
94 hmul2.swizzle_b, GetReg20(insn));
95}
96
97void TranslatorVisitor::HMUL2_cbuf(u64 insn) {
98 union {
99 u64 raw;
100 BitField<52, 1, u64> sat;
101 BitField<54, 1, u64> abs_b;
102 BitField<43, 1, u64> neg_a;
103 BitField<44, 1, u64> abs_a;
104 } const hmul2{insn};
105
106 HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false,
107 Swizzle::F32, GetCbuf(insn));
108}
109
110void TranslatorVisitor::HMUL2_imm(u64 insn) {
111 union {
112 u64 raw;
113 BitField<52, 1, u64> sat;
114 BitField<56, 1, u64> neg_high;
115 BitField<30, 9, u64> high;
116 BitField<29, 1, u64> neg_low;
117 BitField<20, 9, u64> low;
118 BitField<43, 1, u64> neg_a;
119 BitField<44, 1, u64> abs_a;
120 } const hmul2{insn};
121
122 const u32 imm{
123 static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) |
124 static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)};
125 HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false,
126 Swizzle::H1_H0, ir.Imm32(imm));
127}
128
129void TranslatorVisitor::HMUL2_32I(u64 insn) {
130 union {
131 u64 raw;
132 BitField<55, 2, HalfPrecision> precision;
133 BitField<52, 1, u64> sat;
134 BitField<53, 2, Swizzle> swizzle_a;
135 BitField<20, 32, u64> imm32;
136 } const hmul2{insn};
137
138 const u32 imm{static_cast<u32>(hmul2.imm32)};
139 HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false,
140 Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision);
141}
142
143} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
new file mode 100644
index 000000000..cca5b831f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
@@ -0,0 +1,117 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b,
10 bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) {
11 union {
12 u64 insn;
13 BitField<0, 8, IR::Reg> dest_reg;
14 BitField<8, 8, IR::Reg> src_a_reg;
15 BitField<39, 3, IR::Pred> pred;
16 BitField<42, 1, u64> neg_pred;
17 BitField<43, 1, u64> neg_a;
18 BitField<45, 2, BooleanOp> bop;
19 BitField<44, 1, u64> abs_a;
20 BitField<47, 2, Swizzle> swizzle_a;
21 } const hset2{insn};
22
23 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)};
24 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
25
26 if (lhs_a.Type() != lhs_b.Type()) {
27 if (lhs_a.Type() == IR::Type::F16) {
28 lhs_a = v.ir.FPConvert(32, lhs_a);
29 rhs_a = v.ir.FPConvert(32, rhs_a);
30 }
31 if (lhs_b.Type() == IR::Type::F16) {
32 lhs_b = v.ir.FPConvert(32, lhs_b);
33 rhs_b = v.ir.FPConvert(32, rhs_b);
34 }
35 }
36
37 lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
38 rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
39
40 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
41 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
42
43 const IR::FpControl control{
44 .no_contraction = false,
45 .rounding = IR::FpRounding::DontCare,
46 .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
47 };
48
49 IR::U1 pred{v.ir.GetPred(hset2.pred)};
50 if (hset2.neg_pred != 0) {
51 pred = v.ir.LogicalNot(pred);
52 }
53 const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
54 const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
55 const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)};
56 const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)};
57
58 const u32 true_value = bf ? 0x3c00 : 0xffff;
59 const IR::U32 true_val_lhs{v.ir.Imm32(true_value)};
60 const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)};
61 const IR::U32 fail_result{v.ir.Imm32(0)};
62 const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)};
63 const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)};
64
65 v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)});
66}
67} // Anonymous namespace
68
69void TranslatorVisitor::HSET2_reg(u64 insn) {
70 union {
71 u64 insn;
72 BitField<30, 1, u64> abs_b;
73 BitField<49, 1, u64> bf;
74 BitField<31, 1, u64> neg_b;
75 BitField<50, 1, u64> ftz;
76 BitField<35, 4, FPCompareOp> compare_op;
77 BitField<28, 2, Swizzle> swizzle_b;
78 } const hset2{insn};
79
80 HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0,
81 hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b);
82}
83
84void TranslatorVisitor::HSET2_cbuf(u64 insn) {
85 union {
86 u64 insn;
87 BitField<53, 1, u64> bf;
88 BitField<56, 1, u64> neg_b;
89 BitField<54, 1, u64> ftz;
90 BitField<49, 4, FPCompareOp> compare_op;
91 } const hset2{insn};
92
93 HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false,
94 hset2.compare_op, Swizzle::F32);
95}
96
97void TranslatorVisitor::HSET2_imm(u64 insn) {
98 union {
99 u64 insn;
100 BitField<53, 1, u64> bf;
101 BitField<54, 1, u64> ftz;
102 BitField<49, 4, FPCompareOp> compare_op;
103 BitField<56, 1, u64> neg_high;
104 BitField<30, 9, u64> high;
105 BitField<29, 1, u64> neg_low;
106 BitField<20, 9, u64> low;
107 } const hset2{insn};
108
109 const u32 imm{
110 static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) |
111 static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)};
112
113 HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
114 Swizzle::H1_H0);
115}
116
117} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
new file mode 100644
index 000000000..b3931dae3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
@@ -0,0 +1,118 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b,
10 Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) {
11 union {
12 u64 insn;
13 BitField<8, 8, IR::Reg> src_a_reg;
14 BitField<3, 3, IR::Pred> dest_pred_a;
15 BitField<0, 3, IR::Pred> dest_pred_b;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<43, 1, u64> neg_a;
19 BitField<45, 2, BooleanOp> bop;
20 BitField<44, 1, u64> abs_a;
21 BitField<6, 1, u64> ftz;
22 BitField<47, 2, Swizzle> swizzle_a;
23 } const hsetp2{insn};
24
25 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)};
26 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
27
28 if (lhs_a.Type() != lhs_b.Type()) {
29 if (lhs_a.Type() == IR::Type::F16) {
30 lhs_a = v.ir.FPConvert(32, lhs_a);
31 rhs_a = v.ir.FPConvert(32, rhs_a);
32 }
33 if (lhs_b.Type() == IR::Type::F16) {
34 lhs_b = v.ir.FPConvert(32, lhs_b);
35 rhs_b = v.ir.FPConvert(32, rhs_b);
36 }
37 }
38
39 lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
40 rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
41
42 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
43 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
44
45 const IR::FpControl control{
46 .no_contraction = false,
47 .rounding = IR::FpRounding::DontCare,
48 .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
49 };
50
51 IR::U1 pred{v.ir.GetPred(hsetp2.pred)};
52 if (hsetp2.neg_pred != 0) {
53 pred = v.ir.LogicalNot(pred);
54 }
55 const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
56 const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
57 const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)};
58 const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)};
59
60 if (h_and) {
61 auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs);
62 v.ir.SetPred(hsetp2.dest_pred_a, result);
63 v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result));
64 } else {
65 v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs);
66 v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs);
67 }
68}
69} // Anonymous namespace
70
71void TranslatorVisitor::HSETP2_reg(u64 insn) {
72 union {
73 u64 insn;
74 BitField<30, 1, u64> abs_b;
75 BitField<49, 1, u64> h_and;
76 BitField<31, 1, u64> neg_b;
77 BitField<35, 4, FPCompareOp> compare_op;
78 BitField<28, 2, Swizzle> swizzle_b;
79 } const hsetp2{insn};
80 HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b,
81 hsetp2.compare_op, hsetp2.h_and != 0);
82}
83
84void TranslatorVisitor::HSETP2_cbuf(u64 insn) {
85 union {
86 u64 insn;
87 BitField<53, 1, u64> h_and;
88 BitField<54, 1, u64> abs_b;
89 BitField<56, 1, u64> neg_b;
90 BitField<49, 4, FPCompareOp> compare_op;
91 } const hsetp2{insn};
92
93 HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32,
94 hsetp2.compare_op, hsetp2.h_and != 0);
95}
96
97void TranslatorVisitor::HSETP2_imm(u64 insn) {
98 union {
99 u64 insn;
100 BitField<53, 1, u64> h_and;
101 BitField<54, 1, u64> ftz;
102 BitField<49, 4, FPCompareOp> compare_op;
103 BitField<56, 1, u64> neg_high;
104 BitField<30, 9, u64> high;
105 BitField<29, 1, u64> neg_low;
106 BitField<20, 9, u64> low;
107 } const hsetp2{insn};
108
109 const u32 imm{static_cast<u32>(hsetp2.low << 6) |
110 static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
111 static_cast<u32>(hsetp2.high << 22) |
112 static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
113
114 HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op,
115 hsetp2.h_and != 0);
116}
117
118} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
new file mode 100644
index 000000000..b446aae0e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -0,0 +1,272 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "shader_recompiler/frontend/ir/ir_emitter.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding,
12 u32 offset) {
13 if (unaligned) {
14 return ir.Imm32(0);
15 }
16 return ir.GetCbuf(binding, IR::U32{IR::Value{offset}});
17}
18} // Anonymous namespace
19
20IR::U32 TranslatorVisitor::X(IR::Reg reg) {
21 return ir.GetReg(reg);
22}
23
24IR::U64 TranslatorVisitor::L(IR::Reg reg) {
25 if (!IR::IsAligned(reg, 2)) {
26 throw NotImplementedException("Unaligned source register {}", reg);
27 }
28 return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
29}
30
31IR::F32 TranslatorVisitor::F(IR::Reg reg) {
32 return ir.BitCast<IR::F32>(X(reg));
33}
34
35IR::F64 TranslatorVisitor::D(IR::Reg reg) {
36 if (!IR::IsAligned(reg, 2)) {
37 throw NotImplementedException("Unaligned source register {}", reg);
38 }
39 return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
40}
41
42void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
43 ir.SetReg(dest_reg, value);
44}
45
46void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
47 if (!IR::IsAligned(dest_reg, 2)) {
48 throw NotImplementedException("Unaligned destination register {}", dest_reg);
49 }
50 const IR::Value result{ir.UnpackUint2x32(value)};
51 for (int i = 0; i < 2; i++) {
52 X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
53 }
54}
55
56void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
57 X(dest_reg, ir.BitCast<IR::U32>(value));
58}
59
60void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) {
61 if (!IR::IsAligned(dest_reg, 2)) {
62 throw NotImplementedException("Unaligned destination register {}", dest_reg);
63 }
64 const IR::Value result{ir.UnpackDouble2x32(value)};
65 for (int i = 0; i < 2; i++) {
66 X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
67 }
68}
69
70IR::U32 TranslatorVisitor::GetReg8(u64 insn) {
71 union {
72 u64 raw;
73 BitField<8, 8, IR::Reg> index;
74 } const reg{insn};
75 return X(reg.index);
76}
77
78IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
79 union {
80 u64 raw;
81 BitField<20, 8, IR::Reg> index;
82 } const reg{insn};
83 return X(reg.index);
84}
85
86IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
87 union {
88 u64 raw;
89 BitField<39, 8, IR::Reg> index;
90 } const reg{insn};
91 return X(reg.index);
92}
93
94IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) {
95 return ir.BitCast<IR::F32>(GetReg8(insn));
96}
97
98IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) {
99 return ir.BitCast<IR::F32>(GetReg20(insn));
100}
101
102IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) {
103 return ir.BitCast<IR::F32>(GetReg39(insn));
104}
105
106IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) {
107 union {
108 u64 raw;
109 BitField<20, 8, IR::Reg> index;
110 } const reg{insn};
111 return D(reg.index);
112}
113
114IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) {
115 union {
116 u64 raw;
117 BitField<39, 8, IR::Reg> index;
118 } const reg{insn};
119 return D(reg.index);
120}
121
122static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
123 union {
124 u64 raw;
125 BitField<20, 14, u64> offset;
126 BitField<34, 5, u64> binding;
127 } const cbuf{insn};
128
129 if (cbuf.binding >= 18) {
130 throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
131 }
132 if (cbuf.offset >= 0x10'000) {
133 throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset);
134 }
135 const IR::Value binding{static_cast<u32>(cbuf.binding)};
136 const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4};
137 return {IR::U32{binding}, IR::U32{byte_offset}};
138}
139
140IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
141 const auto [binding, byte_offset]{CbufAddr(insn)};
142 return ir.GetCbuf(binding, byte_offset);
143}
144
145IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) {
146 const auto [binding, byte_offset]{CbufAddr(insn)};
147 return ir.GetFloatCbuf(binding, byte_offset);
148}
149
150IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
151 union {
152 u64 raw;
153 BitField<20, 1, u64> unaligned;
154 } const cbuf{insn};
155
156 const auto [binding, offset_value]{CbufAddr(insn)};
157 const bool unaligned{cbuf.unaligned != 0};
158 const u32 offset{offset_value.U32()};
159 const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u};
160
161 const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})};
162 const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)};
163 return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value));
164}
165
166IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) {
167 union {
168 u64 raw;
169 BitField<20, 1, u64> unaligned;
170 } const cbuf{insn};
171
172 if (cbuf.unaligned != 0) {
173 throw NotImplementedException("Unaligned packed constant buffer read");
174 }
175 const auto [binding, lower_offset]{CbufAddr(insn)};
176 const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)};
177 const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)};
178 const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)};
179 return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value));
180}
181
182IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
183 union {
184 u64 raw;
185 BitField<20, 19, u64> value;
186 BitField<56, 1, u64> is_negative;
187 } const imm{insn};
188
189 if (imm.is_negative != 0) {
190 const s64 raw{static_cast<s64>(imm.value)};
191 return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw));
192 } else {
193 return ir.Imm32(static_cast<u32>(imm.value));
194 }
195}
196
197IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
198 union {
199 u64 raw;
200 BitField<20, 19, u64> value;
201 BitField<56, 1, u64> is_negative;
202 } const imm{insn};
203 const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)};
204 const u32 value{static_cast<u32>(imm.value) << 12};
205 return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
206}
207
208IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) {
209 union {
210 u64 raw;
211 BitField<20, 19, u64> value;
212 BitField<56, 1, u64> is_negative;
213 } const imm{insn};
214 const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0};
215 const u64 value{imm.value << 44};
216 return ir.Imm64(Common::BitCast<f64>(value | sign_bit));
217}
218
219IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) {
220 const s64 value{GetImm20(insn).U32()};
221 return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32));
222}
223
224IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
225 union {
226 u64 raw;
227 BitField<20, 32, u64> value;
228 } const imm{insn};
229 return ir.Imm32(static_cast<u32>(imm.value));
230}
231
232IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) {
233 union {
234 u64 raw;
235 BitField<20, 32, u64> value;
236 } const imm{insn};
237 return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value)));
238}
239
240void TranslatorVisitor::SetZFlag(const IR::U1& value) {
241 ir.SetZFlag(value);
242}
243
244void TranslatorVisitor::SetSFlag(const IR::U1& value) {
245 ir.SetSFlag(value);
246}
247
248void TranslatorVisitor::SetCFlag(const IR::U1& value) {
249 ir.SetCFlag(value);
250}
251
252void TranslatorVisitor::SetOFlag(const IR::U1& value) {
253 ir.SetOFlag(value);
254}
255
256void TranslatorVisitor::ResetZero() {
257 SetZFlag(ir.Imm1(false));
258}
259
260void TranslatorVisitor::ResetSFlag() {
261 SetSFlag(ir.Imm1(false));
262}
263
264void TranslatorVisitor::ResetCFlag() {
265 SetCFlag(ir.Imm1(false));
266}
267
268void TranslatorVisitor::ResetOFlag() {
269 SetOFlag(ir.Imm1(false));
270}
271
272} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
new file mode 100644
index 000000000..335e4f24f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -0,0 +1,387 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/environment.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9#include "shader_recompiler/frontend/ir/ir_emitter.h"
10#include "shader_recompiler/frontend/maxwell/instruction.h"
11
12namespace Shader::Maxwell {
13
14enum class CompareOp : u64 {
15 False,
16 LessThan,
17 Equal,
18 LessThanEqual,
19 GreaterThan,
20 NotEqual,
21 GreaterThanEqual,
22 True,
23};
24
25enum class BooleanOp : u64 {
26 AND,
27 OR,
28 XOR,
29};
30
31enum class PredicateOp : u64 {
32 False,
33 True,
34 Zero,
35 NonZero,
36};
37
38enum class FPCompareOp : u64 {
39 F,
40 LT,
41 EQ,
42 LE,
43 GT,
44 NE,
45 GE,
46 NUM,
47 Nan,
48 LTU,
49 EQU,
50 LEU,
51 GTU,
52 NEU,
53 GEU,
54 T,
55};
56
57class TranslatorVisitor {
58public:
59 explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {}
60
61 Environment& env;
62 IR::IREmitter ir;
63
64 void AL2P(u64 insn);
65 void ALD(u64 insn);
66 void AST(u64 insn);
67 void ATOM_cas(u64 insn);
68 void ATOM(u64 insn);
69 void ATOMS_cas(u64 insn);
70 void ATOMS(u64 insn);
71 void B2R(u64 insn);
72 void BAR(u64 insn);
73 void BFE_reg(u64 insn);
74 void BFE_cbuf(u64 insn);
75 void BFE_imm(u64 insn);
76 void BFI_reg(u64 insn);
77 void BFI_rc(u64 insn);
78 void BFI_cr(u64 insn);
79 void BFI_imm(u64 insn);
80 void BPT(u64 insn);
81 void BRA(u64 insn);
82 void BRK(u64 insn);
83 void BRX(u64 insn);
84 void CAL();
85 void CCTL(u64 insn);
86 void CCTLL(u64 insn);
87 void CONT(u64 insn);
88 void CS2R(u64 insn);
89 void CSET(u64 insn);
90 void CSETP(u64 insn);
91 void DADD_reg(u64 insn);
92 void DADD_cbuf(u64 insn);
93 void DADD_imm(u64 insn);
94 void DEPBAR();
95 void DFMA_reg(u64 insn);
96 void DFMA_rc(u64 insn);
97 void DFMA_cr(u64 insn);
98 void DFMA_imm(u64 insn);
99 void DMNMX_reg(u64 insn);
100 void DMNMX_cbuf(u64 insn);
101 void DMNMX_imm(u64 insn);
102 void DMUL_reg(u64 insn);
103 void DMUL_cbuf(u64 insn);
104 void DMUL_imm(u64 insn);
105 void DSET_reg(u64 insn);
106 void DSET_cbuf(u64 insn);
107 void DSET_imm(u64 insn);
108 void DSETP_reg(u64 insn);
109 void DSETP_cbuf(u64 insn);
110 void DSETP_imm(u64 insn);
111 void EXIT();
112 void F2F_reg(u64 insn);
113 void F2F_cbuf(u64 insn);
114 void F2F_imm(u64 insn);
115 void F2I_reg(u64 insn);
116 void F2I_cbuf(u64 insn);
117 void F2I_imm(u64 insn);
118 void FADD_reg(u64 insn);
119 void FADD_cbuf(u64 insn);
120 void FADD_imm(u64 insn);
121 void FADD32I(u64 insn);
122 void FCHK_reg(u64 insn);
123 void FCHK_cbuf(u64 insn);
124 void FCHK_imm(u64 insn);
125 void FCMP_reg(u64 insn);
126 void FCMP_rc(u64 insn);
127 void FCMP_cr(u64 insn);
128 void FCMP_imm(u64 insn);
129 void FFMA_reg(u64 insn);
130 void FFMA_rc(u64 insn);
131 void FFMA_cr(u64 insn);
132 void FFMA_imm(u64 insn);
133 void FFMA32I(u64 insn);
134 void FLO_reg(u64 insn);
135 void FLO_cbuf(u64 insn);
136 void FLO_imm(u64 insn);
137 void FMNMX_reg(u64 insn);
138 void FMNMX_cbuf(u64 insn);
139 void FMNMX_imm(u64 insn);
140 void FMUL_reg(u64 insn);
141 void FMUL_cbuf(u64 insn);
142 void FMUL_imm(u64 insn);
143 void FMUL32I(u64 insn);
144 void FSET_reg(u64 insn);
145 void FSET_cbuf(u64 insn);
146 void FSET_imm(u64 insn);
147 void FSETP_reg(u64 insn);
148 void FSETP_cbuf(u64 insn);
149 void FSETP_imm(u64 insn);
150 void FSWZADD(u64 insn);
151 void GETCRSPTR(u64 insn);
152 void GETLMEMBASE(u64 insn);
153 void HADD2_reg(u64 insn);
154 void HADD2_cbuf(u64 insn);
155 void HADD2_imm(u64 insn);
156 void HADD2_32I(u64 insn);
157 void HFMA2_reg(u64 insn);
158 void HFMA2_rc(u64 insn);
159 void HFMA2_cr(u64 insn);
160 void HFMA2_imm(u64 insn);
161 void HFMA2_32I(u64 insn);
162 void HMUL2_reg(u64 insn);
163 void HMUL2_cbuf(u64 insn);
164 void HMUL2_imm(u64 insn);
165 void HMUL2_32I(u64 insn);
166 void HSET2_reg(u64 insn);
167 void HSET2_cbuf(u64 insn);
168 void HSET2_imm(u64 insn);
169 void HSETP2_reg(u64 insn);
170 void HSETP2_cbuf(u64 insn);
171 void HSETP2_imm(u64 insn);
172 void I2F_reg(u64 insn);
173 void I2F_cbuf(u64 insn);
174 void I2F_imm(u64 insn);
175 void I2I_reg(u64 insn);
176 void I2I_cbuf(u64 insn);
177 void I2I_imm(u64 insn);
178 void IADD_reg(u64 insn);
179 void IADD_cbuf(u64 insn);
180 void IADD_imm(u64 insn);
181 void IADD3_reg(u64 insn);
182 void IADD3_cbuf(u64 insn);
183 void IADD3_imm(u64 insn);
184 void IADD32I(u64 insn);
185 void ICMP_reg(u64 insn);
186 void ICMP_rc(u64 insn);
187 void ICMP_cr(u64 insn);
188 void ICMP_imm(u64 insn);
189 void IDE(u64 insn);
190 void IDP_reg(u64 insn);
191 void IDP_imm(u64 insn);
192 void IMAD_reg(u64 insn);
193 void IMAD_rc(u64 insn);
194 void IMAD_cr(u64 insn);
195 void IMAD_imm(u64 insn);
196 void IMAD32I(u64 insn);
197 void IMADSP_reg(u64 insn);
198 void IMADSP_rc(u64 insn);
199 void IMADSP_cr(u64 insn);
200 void IMADSP_imm(u64 insn);
201 void IMNMX_reg(u64 insn);
202 void IMNMX_cbuf(u64 insn);
203 void IMNMX_imm(u64 insn);
204 void IMUL_reg(u64 insn);
205 void IMUL_cbuf(u64 insn);
206 void IMUL_imm(u64 insn);
207 void IMUL32I(u64 insn);
208 void IPA(u64 insn);
209 void ISBERD(u64 insn);
210 void ISCADD_reg(u64 insn);
211 void ISCADD_cbuf(u64 insn);
212 void ISCADD_imm(u64 insn);
213 void ISCADD32I(u64 insn);
214 void ISET_reg(u64 insn);
215 void ISET_cbuf(u64 insn);
216 void ISET_imm(u64 insn);
217 void ISETP_reg(u64 insn);
218 void ISETP_cbuf(u64 insn);
219 void ISETP_imm(u64 insn);
220 void JCAL(u64 insn);
221 void JMP(u64 insn);
222 void JMX(u64 insn);
223 void KIL();
224 void LD(u64 insn);
225 void LDC(u64 insn);
226 void LDG(u64 insn);
227 void LDL(u64 insn);
228 void LDS(u64 insn);
229 void LEA_hi_reg(u64 insn);
230 void LEA_hi_cbuf(u64 insn);
231 void LEA_lo_reg(u64 insn);
232 void LEA_lo_cbuf(u64 insn);
233 void LEA_lo_imm(u64 insn);
234 void LEPC(u64 insn);
235 void LONGJMP(u64 insn);
236 void LOP_reg(u64 insn);
237 void LOP_cbuf(u64 insn);
238 void LOP_imm(u64 insn);
239 void LOP3_reg(u64 insn);
240 void LOP3_cbuf(u64 insn);
241 void LOP3_imm(u64 insn);
242 void LOP32I(u64 insn);
243 void MEMBAR(u64 insn);
244 void MOV_reg(u64 insn);
245 void MOV_cbuf(u64 insn);
246 void MOV_imm(u64 insn);
247 void MOV32I(u64 insn);
248 void MUFU(u64 insn);
249 void NOP(u64 insn);
250 void OUT_reg(u64 insn);
251 void OUT_cbuf(u64 insn);
252 void OUT_imm(u64 insn);
253 void P2R_reg(u64 insn);
254 void P2R_cbuf(u64 insn);
255 void P2R_imm(u64 insn);
256 void PBK();
257 void PCNT();
258 void PEXIT(u64 insn);
259 void PIXLD(u64 insn);
260 void PLONGJMP(u64 insn);
261 void POPC_reg(u64 insn);
262 void POPC_cbuf(u64 insn);
263 void POPC_imm(u64 insn);
264 void PRET(u64 insn);
265 void PRMT_reg(u64 insn);
266 void PRMT_rc(u64 insn);
267 void PRMT_cr(u64 insn);
268 void PRMT_imm(u64 insn);
269 void PSET(u64 insn);
270 void PSETP(u64 insn);
271 void R2B(u64 insn);
272 void R2P_reg(u64 insn);
273 void R2P_cbuf(u64 insn);
274 void R2P_imm(u64 insn);
275 void RAM(u64 insn);
276 void RED(u64 insn);
277 void RET(u64 insn);
278 void RRO_reg(u64 insn);
279 void RRO_cbuf(u64 insn);
280 void RRO_imm(u64 insn);
281 void RTT(u64 insn);
282 void S2R(u64 insn);
283 void SAM(u64 insn);
284 void SEL_reg(u64 insn);
285 void SEL_cbuf(u64 insn);
286 void SEL_imm(u64 insn);
287 void SETCRSPTR(u64 insn);
288 void SETLMEMBASE(u64 insn);
289 void SHF_l_reg(u64 insn);
290 void SHF_l_imm(u64 insn);
291 void SHF_r_reg(u64 insn);
292 void SHF_r_imm(u64 insn);
293 void SHFL(u64 insn);
294 void SHL_reg(u64 insn);
295 void SHL_cbuf(u64 insn);
296 void SHL_imm(u64 insn);
297 void SHR_reg(u64 insn);
298 void SHR_cbuf(u64 insn);
299 void SHR_imm(u64 insn);
300 void SSY();
301 void ST(u64 insn);
302 void STG(u64 insn);
303 void STL(u64 insn);
304 void STP(u64 insn);
305 void STS(u64 insn);
306 void SUATOM(u64 insn);
307 void SUATOM_cas(u64 insn);
308 void SULD(u64 insn);
309 void SURED(u64 insn);
310 void SUST(u64 insn);
311 void SYNC(u64 insn);
312 void TEX(u64 insn);
313 void TEX_b(u64 insn);
314 void TEXS(u64 insn);
315 void TLD(u64 insn);
316 void TLD_b(u64 insn);
317 void TLD4(u64 insn);
318 void TLD4_b(u64 insn);
319 void TLD4S(u64 insn);
320 void TLDS(u64 insn);
321 void TMML(u64 insn);
322 void TMML_b(u64 insn);
323 void TXA(u64 insn);
324 void TXD(u64 insn);
325 void TXD_b(u64 insn);
326 void TXQ(u64 insn);
327 void TXQ_b(u64 insn);
328 void VABSDIFF(u64 insn);
329 void VABSDIFF4(u64 insn);
330 void VADD(u64 insn);
331 void VMAD(u64 insn);
332 void VMNMX(u64 insn);
333 void VOTE(u64 insn);
334 void VOTE_vtg(u64 insn);
335 void VSET(u64 insn);
336 void VSETP(u64 insn);
337 void VSHL(u64 insn);
338 void VSHR(u64 insn);
339 void XMAD_reg(u64 insn);
340 void XMAD_rc(u64 insn);
341 void XMAD_cr(u64 insn);
342 void XMAD_imm(u64 insn);
343
344 [[nodiscard]] IR::U32 X(IR::Reg reg);
345 [[nodiscard]] IR::U64 L(IR::Reg reg);
346 [[nodiscard]] IR::F32 F(IR::Reg reg);
347 [[nodiscard]] IR::F64 D(IR::Reg reg);
348
349 void X(IR::Reg dest_reg, const IR::U32& value);
350 void L(IR::Reg dest_reg, const IR::U64& value);
351 void F(IR::Reg dest_reg, const IR::F32& value);
352 void D(IR::Reg dest_reg, const IR::F64& value);
353
354 [[nodiscard]] IR::U32 GetReg8(u64 insn);
355 [[nodiscard]] IR::U32 GetReg20(u64 insn);
356 [[nodiscard]] IR::U32 GetReg39(u64 insn);
357 [[nodiscard]] IR::F32 GetFloatReg8(u64 insn);
358 [[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
359 [[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
360 [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn);
361 [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn);
362
363 [[nodiscard]] IR::U32 GetCbuf(u64 insn);
364 [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
365 [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn);
366 [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn);
367
368 [[nodiscard]] IR::U32 GetImm20(u64 insn);
369 [[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
370 [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn);
371 [[nodiscard]] IR::U64 GetPackedImm20(u64 insn);
372
373 [[nodiscard]] IR::U32 GetImm32(u64 insn);
374 [[nodiscard]] IR::F32 GetFloatImm32(u64 insn);
375
376 void SetZFlag(const IR::U1& value);
377 void SetSFlag(const IR::U1& value);
378 void SetCFlag(const IR::U1& value);
379 void SetOFlag(const IR::U1& value);
380
381 void ResetZero();
382 void ResetSFlag();
383 void ResetCFlag();
384 void ResetOFlag();
385};
386
387} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
new file mode 100644
index 000000000..8ffd84867
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
@@ -0,0 +1,105 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x,
12 bool cc) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a;
17 } const iadd{insn};
18
19 if (sat) {
20 throw NotImplementedException("IADD SAT");
21 }
22 if (x && po) {
23 throw NotImplementedException("IADD X+PO");
24 }
25 // Operand A is always read from here, negated if needed
26 IR::U32 op_a{v.X(iadd.src_a)};
27 if (neg_a) {
28 op_a = v.ir.INeg(op_a);
29 }
30 // Add both operands
31 IR::U32 result{v.ir.IAdd(op_a, op_b)};
32 if (x) {
33 const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
34 result = v.ir.IAdd(result, carry);
35 }
36 if (po) {
37 // .PO adds one to the result
38 result = v.ir.IAdd(result, v.ir.Imm32(1));
39 }
40 if (cc) {
41 // Store flags
42 // TODO: Does this grab the result pre-PO or after?
43 if (po) {
44 throw NotImplementedException("IADD CC+PO");
45 }
46 // TODO: How does CC behave when X is set?
47 if (x) {
48 throw NotImplementedException("IADD X+CC");
49 }
50 v.SetZFlag(v.ir.GetZeroFromOp(result));
51 v.SetSFlag(v.ir.GetSignFromOp(result));
52 v.SetCFlag(v.ir.GetCarryFromOp(result));
53 v.SetOFlag(v.ir.GetOverflowFromOp(result));
54 }
55 // Store result
56 v.X(iadd.dest_reg, result);
57}
58
59void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
60 union {
61 u64 insn;
62 BitField<43, 1, u64> x;
63 BitField<47, 1, u64> cc;
64 BitField<48, 2, u64> three_for_po;
65 BitField<48, 1, u64> neg_b;
66 BitField<49, 1, u64> neg_a;
67 BitField<50, 1, u64> sat;
68 } const iadd{insn};
69
70 const bool po{iadd.three_for_po == 3};
71 if (!po && iadd.neg_b != 0) {
72 op_b = v.ir.INeg(op_b);
73 }
74 IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0);
75}
76} // Anonymous namespace
77
78void TranslatorVisitor::IADD_reg(u64 insn) {
79 IADD(*this, insn, GetReg20(insn));
80}
81
82void TranslatorVisitor::IADD_cbuf(u64 insn) {
83 IADD(*this, insn, GetCbuf(insn));
84}
85
86void TranslatorVisitor::IADD_imm(u64 insn) {
87 IADD(*this, insn, GetImm20(insn));
88}
89
90void TranslatorVisitor::IADD32I(u64 insn) {
91 union {
92 u64 raw;
93 BitField<52, 1, u64> cc;
94 BitField<53, 1, u64> x;
95 BitField<54, 1, u64> sat;
96 BitField<55, 2, u64> three_for_po;
97 BitField<56, 1, u64> neg_a;
98 } const iadd32i{insn};
99
100 const bool po{iadd32i.three_for_po == 3};
101 const bool neg_a{!po && iadd32i.neg_a != 0};
102 IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0);
103}
104
105} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
new file mode 100644
index 000000000..040cfc10f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
@@ -0,0 +1,122 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Shift : u64 {
12 None,
13 Right,
14 Left,
15};
16enum class Half : u64 {
17 All,
18 Lower,
19 Upper,
20};
21
22[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) {
23 constexpr bool is_signed{false};
24 switch (half) {
25 case Half::All:
26 return value;
27 case Half::Lower:
28 return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed);
29 case Half::Upper:
30 return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed);
31 }
32 throw NotImplementedException("Invalid half");
33}
34
35[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) {
36 switch (shift) {
37 case Shift::None:
38 return value;
39 case Shift::Right: {
40 // 33-bit RS IADD3 edge case
41 const IR::U1 edge_case{ir.GetCarryFromOp(value)};
42 const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))};
43 return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)};
44 }
45 case Shift::Left:
46 return ir.ShiftLeftLogical(value, ir.Imm32(16));
47 }
48 throw NotImplementedException("Invalid shift");
49}
50
51void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c,
52 Shift shift = Shift::None) {
53 union {
54 u64 insn;
55 BitField<0, 8, IR::Reg> dest_reg;
56 BitField<47, 1, u64> cc;
57 BitField<48, 1, u64> x;
58 BitField<49, 1, u64> neg_c;
59 BitField<50, 1, u64> neg_b;
60 BitField<51, 1, u64> neg_a;
61 } iadd3{insn};
62
63 if (iadd3.neg_a != 0) {
64 op_a = v.ir.INeg(op_a);
65 }
66 if (iadd3.neg_b != 0) {
67 op_b = v.ir.INeg(op_b);
68 }
69 if (iadd3.neg_c != 0) {
70 op_c = v.ir.INeg(op_c);
71 }
72 IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)};
73 if (iadd3.x != 0) {
74 // TODO: How does RS behave when X is set?
75 if (shift == Shift::Right) {
76 throw NotImplementedException("IADD3 X+RS");
77 }
78 const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
79 lhs_1 = v.ir.IAdd(lhs_1, carry);
80 }
81 const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)};
82 const IR::U32 result{v.ir.IAdd(lhs_2, op_c)};
83
84 v.X(iadd3.dest_reg, result);
85 if (iadd3.cc != 0) {
86 // TODO: How does CC behave when X is set?
87 if (iadd3.x != 0) {
88 throw NotImplementedException("IADD3 X+CC");
89 }
90 v.SetZFlag(v.ir.GetZeroFromOp(result));
91 v.SetSFlag(v.ir.GetSignFromOp(result));
92 v.SetCFlag(v.ir.GetCarryFromOp(result));
93 const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)};
94 v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1));
95 }
96}
97} // Anonymous namespace
98
99void TranslatorVisitor::IADD3_reg(u64 insn) {
100 union {
101 u64 insn;
102 BitField<37, 2, Shift> shift;
103 BitField<35, 2, Half> half_a;
104 BitField<33, 2, Half> half_b;
105 BitField<31, 2, Half> half_c;
106 } const iadd3{insn};
107
108 const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)};
109 const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)};
110 const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)};
111 IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift);
112}
113
114void TranslatorVisitor::IADD3_cbuf(u64 insn) {
115 IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn));
116}
117
118void TranslatorVisitor::IADD3_imm(u64 insn) {
119 IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn));
120}
121
122} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
new file mode 100644
index 000000000..ba6e01926
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
@@ -0,0 +1,48 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_reg;
17 BitField<48, 1, u64> is_signed;
18 BitField<49, 3, CompareOp> compare_op;
19 } const icmp{insn};
20
21 const IR::U32 zero{v.ir.Imm32(0)};
22 const bool is_signed{icmp.is_signed != 0};
23 const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)};
24
25 const IR::U32 src_reg{v.X(icmp.src_reg)};
26 const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
27
28 v.X(icmp.dest_reg, result);
29}
30} // Anonymous namespace
31
32void TranslatorVisitor::ICMP_reg(u64 insn) {
33 ICMP(*this, insn, GetReg20(insn), GetReg39(insn));
34}
35
36void TranslatorVisitor::ICMP_rc(u64 insn) {
37 ICMP(*this, insn, GetReg39(insn), GetCbuf(insn));
38}
39
40void TranslatorVisitor::ICMP_cr(u64 insn) {
41 ICMP(*this, insn, GetCbuf(insn), GetReg39(insn));
42}
43
44void TranslatorVisitor::ICMP_imm(u64 insn) {
45 ICMP(*this, insn, GetImm20(insn), GetReg39(insn));
46}
47
48} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
new file mode 100644
index 000000000..8ce1aee04
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
@@ -0,0 +1,80 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
13 CompareOp compare_op, bool is_signed, bool x) {
14 return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
15 : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
16}
17
18void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
19 union {
20 u64 insn;
21 BitField<0, 8, IR::Reg> dest_reg;
22 BitField<8, 8, IR::Reg> src_reg;
23 BitField<39, 3, IR::Pred> pred;
24 BitField<42, 1, u64> neg_pred;
25 BitField<43, 1, u64> x;
26 BitField<44, 1, u64> bf;
27 BitField<45, 2, BooleanOp> bop;
28 BitField<47, 1, u64> cc;
29 BitField<48, 1, u64> is_signed;
30 BitField<49, 3, CompareOp> compare_op;
31 } const iset{insn};
32
33 const IR::U32 src_a{v.X(iset.src_reg)};
34 const bool is_signed{iset.is_signed != 0};
35 const IR::U32 zero{v.ir.Imm32(0)};
36 const bool x{iset.x != 0};
37 const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)};
38
39 IR::U1 pred{v.ir.GetPred(iset.pred)};
40 if (iset.neg_pred != 0) {
41 pred = v.ir.LogicalNot(pred);
42 }
43 const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)};
44
45 const IR::U32 one_mask{v.ir.Imm32(-1)};
46 const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
47 const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one};
48 const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
49
50 v.X(iset.dest_reg, result);
51 if (iset.cc != 0) {
52 if (x) {
53 throw NotImplementedException("ISET.CC + X");
54 }
55 const IR::U1 is_zero{v.ir.IEqual(result, zero)};
56 v.SetZFlag(is_zero);
57 if (iset.bf != 0) {
58 v.ResetSFlag();
59 } else {
60 v.SetSFlag(v.ir.LogicalNot(is_zero));
61 }
62 v.ResetCFlag();
63 v.ResetOFlag();
64 }
65}
66} // Anonymous namespace
67
68void TranslatorVisitor::ISET_reg(u64 insn) {
69 ISET(*this, insn, GetReg20(insn));
70}
71
72void TranslatorVisitor::ISET_cbuf(u64 insn) {
73 ISET(*this, insn, GetCbuf(insn));
74}
75
76void TranslatorVisitor::ISET_imm(u64 insn) {
77 ISET(*this, insn, GetImm20(insn));
78}
79
80} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
new file mode 100644
index 000000000..0b8119ddd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
@@ -0,0 +1,182 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12enum class FloatFormat : u64 {
13 F16 = 1,
14 F32 = 2,
15 F64 = 3,
16};
17
18enum class IntFormat : u64 {
19 U8 = 0,
20 U16 = 1,
21 U32 = 2,
22 U64 = 3,
23};
24
25union Encoding {
26 u64 raw;
27 BitField<0, 8, IR::Reg> dest_reg;
28 BitField<8, 2, FloatFormat> float_format;
29 BitField<10, 2, IntFormat> int_format;
30 BitField<13, 1, u64> is_signed;
31 BitField<39, 2, FpRounding> fp_rounding;
32 BitField<41, 2, u64> selector;
33 BitField<47, 1, u64> cc;
34 BitField<45, 1, u64> neg;
35 BitField<49, 1, u64> abs;
36};
37
38bool Is64(u64 insn) {
39 return Encoding{insn}.int_format == IntFormat::U64;
40}
41
42int BitSize(FloatFormat format) {
43 switch (format) {
44 case FloatFormat::F16:
45 return 16;
46 case FloatFormat::F32:
47 return 32;
48 case FloatFormat::F64:
49 return 64;
50 }
51 throw NotImplementedException("Invalid float format {}", format);
52}
53
54IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) {
55 const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))};
56 const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))};
57 const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)};
58 const IR::U1 is_least{v.ir.IEqual(value, least_value)};
59 return IR::U32{v.ir.Select(is_least, value, absolute)};
60}
61
62void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
63 const Encoding i2f{insn};
64 if (i2f.cc != 0) {
65 throw NotImplementedException("I2F CC");
66 }
67 const bool is_signed{i2f.is_signed != 0};
68 int src_bitsize{};
69 switch (i2f.int_format) {
70 case IntFormat::U8:
71 src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
72 v.ir.Imm32(8), is_signed);
73 if (i2f.abs != 0) {
74 src = SmallAbs(v, src, 8);
75 }
76 src_bitsize = 8;
77 break;
78 case IntFormat::U16:
79 if (i2f.selector == 1 || i2f.selector == 3) {
80 throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value());
81 }
82 src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
83 v.ir.Imm32(16), is_signed);
84 if (i2f.abs != 0) {
85 src = SmallAbs(v, src, 16);
86 }
87 src_bitsize = 16;
88 break;
89 case IntFormat::U32:
90 case IntFormat::U64:
91 if (i2f.selector != 0) {
92 throw NotImplementedException("Unexpected selector {}", i2f.selector.Value());
93 }
94 if (i2f.abs != 0 && is_signed) {
95 src = v.ir.IAbs(src);
96 }
97 src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32;
98 break;
99 }
100 const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32};
101 const int dst_bitsize{BitSize(i2f.float_format)};
102 const IR::FpControl fp_control{
103 .no_contraction = false,
104 .rounding = CastFpRounding(i2f.fp_rounding),
105 .fmz_mode = IR::FmzMode::DontCare,
106 };
107 auto value{v.ir.ConvertIToF(static_cast<size_t>(dst_bitsize),
108 static_cast<size_t>(conversion_src_bitsize), is_signed, src,
109 fp_control)};
110 if (i2f.neg != 0) {
111 if (i2f.abs != 0 || !is_signed) {
112 // We know the value is positive
113 value = v.ir.FPNeg(value);
114 } else {
115 // Only negate if the input isn't the lowest value
116 IR::U1 is_least;
117 if (src_bitsize == 64) {
118 is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min()));
119 } else if (src_bitsize == 32) {
120 is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min()));
121 } else {
122 const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))};
123 is_least = v.ir.IEqual(src, least_value);
124 }
125 value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))};
126 }
127 }
128 switch (i2f.float_format) {
129 case FloatFormat::F16: {
130 const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
131 v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
132 break;
133 }
134 case FloatFormat::F32:
135 v.F(i2f.dest_reg, value);
136 break;
137 case FloatFormat::F64: {
138 if (!IR::IsAligned(i2f.dest_reg, 2)) {
139 throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
140 }
141 const IR::Value vector{v.ir.UnpackDouble2x32(value)};
142 for (int i = 0; i < 2; ++i) {
143 v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))});
144 }
145 break;
146 }
147 default:
148 throw NotImplementedException("Invalid float format {}", i2f.float_format.Value());
149 }
150}
151} // Anonymous namespace
152
153void TranslatorVisitor::I2F_reg(u64 insn) {
154 if (Is64(insn)) {
155 union {
156 u64 raw;
157 BitField<20, 8, IR::Reg> reg;
158 } const value{insn};
159 const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))};
160 I2F(*this, insn, ir.PackUint2x32(regs));
161 } else {
162 I2F(*this, insn, GetReg20(insn));
163 }
164}
165
166void TranslatorVisitor::I2F_cbuf(u64 insn) {
167 if (Is64(insn)) {
168 I2F(*this, insn, GetPackedCbuf(insn));
169 } else {
170 I2F(*this, insn, GetCbuf(insn));
171 }
172}
173
174void TranslatorVisitor::I2F_imm(u64 insn) {
175 if (Is64(insn)) {
176 I2F(*this, insn, GetPackedImm20(insn));
177 } else {
178 I2F(*this, insn, GetImm20(insn));
179 }
180}
181
182} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
new file mode 100644
index 000000000..5feefc0ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
@@ -0,0 +1,82 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class MaxShift : u64 {
12 U32,
13 Undefined,
14 U64,
15 S64,
16};
17
18IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift,
19 bool right_shift, bool is_signed) {
20 if (!right_shift) {
21 return ir.ShiftLeftLogical(packed_int, safe_shift);
22 }
23 if (is_signed) {
24 return ir.ShiftRightArithmetic(packed_int, safe_shift);
25 }
26 return ir.ShiftRightLogical(packed_int, safe_shift);
27}
28
29void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits,
30 bool right_shift) {
31 union {
32 u64 insn;
33 BitField<0, 8, IR::Reg> dest_reg;
34 BitField<0, 8, IR::Reg> lo_bits_reg;
35 BitField<37, 2, MaxShift> max_shift;
36 BitField<47, 1, u64> cc;
37 BitField<48, 2, u64> x_mode;
38 BitField<50, 1, u64> wrap;
39 } const shf{insn};
40
41 if (shf.cc != 0) {
42 throw NotImplementedException("SHF CC");
43 }
44 if (shf.x_mode != 0) {
45 throw NotImplementedException("SHF X Mode");
46 }
47 if (shf.max_shift == MaxShift::Undefined) {
48 throw NotImplementedException("SHF Use of undefined MaxShift value");
49 }
50 const IR::U32 low_bits{v.X(shf.lo_bits_reg)};
51 const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))};
52 const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)};
53 const IR::U32 safe_shift{shf.wrap != 0
54 ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1)))
55 : v.ir.UMin(shift, max_shift)};
56
57 const bool is_signed{shf.max_shift == MaxShift::S64};
58 const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)};
59 const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)};
60
61 const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)};
62 v.X(shf.dest_reg, result);
63}
64} // Anonymous namespace
65
66void TranslatorVisitor::SHF_l_reg(u64 insn) {
67 SHF(*this, insn, GetReg20(insn), GetReg39(insn), false);
68}
69
70void TranslatorVisitor::SHF_l_imm(u64 insn) {
71 SHF(*this, insn, GetImm20(insn), GetReg39(insn), false);
72}
73
74void TranslatorVisitor::SHF_r_reg(u64 insn) {
75 SHF(*this, insn, GetReg20(insn), GetReg39(insn), true);
76}
77
78void TranslatorVisitor::SHF_r_imm(u64 insn) {
79 SHF(*this, insn, GetImm20(insn), GetReg39(insn), true);
80}
81
82} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
new file mode 100644
index 000000000..1badbacc4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
@@ -0,0 +1,64 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_reg;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<43, 2, u64> mode;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> is_signed;
21 } const imnmx{insn};
22
23 if (imnmx.cc != 0) {
24 throw NotImplementedException("IMNMX CC");
25 }
26
27 if (imnmx.mode != 0) {
28 throw NotImplementedException("IMNMX.MODE");
29 }
30
31 const IR::U1 pred{v.ir.GetPred(imnmx.pred)};
32 const IR::U32 op_a{v.X(imnmx.src_reg)};
33 IR::U32 min;
34 IR::U32 max;
35
36 if (imnmx.is_signed != 0) {
37 min = IR::U32{v.ir.SMin(op_a, op_b)};
38 max = IR::U32{v.ir.SMax(op_a, op_b)};
39 } else {
40 min = IR::U32{v.ir.UMin(op_a, op_b)};
41 max = IR::U32{v.ir.UMax(op_a, op_b)};
42 }
43 if (imnmx.neg_pred != 0) {
44 std::swap(min, max);
45 }
46
47 const IR::U32 result{v.ir.Select(pred, min, max)};
48 v.X(imnmx.dest_reg, result);
49}
50} // Anonymous namespace
51
52void TranslatorVisitor::IMNMX_reg(u64 insn) {
53 IMNMX(*this, insn, GetReg20(insn));
54}
55
56void TranslatorVisitor::IMNMX_cbuf(u64 insn) {
57 IMNMX(*this, insn, GetCbuf(insn));
58}
59
60void TranslatorVisitor::IMNMX_imm(u64 insn) {
61 IMNMX(*this, insn, GetImm20(insn));
62}
63
64} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
new file mode 100644
index 000000000..5ece7678d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
@@ -0,0 +1,36 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<40, 1, u64> tilde;
16 } const popc{insn};
17
18 const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src);
19 const IR::U32 result = v.ir.BitCount(operand);
20 v.X(popc.dest_reg, result);
21}
22} // Anonymous namespace
23
24void TranslatorVisitor::POPC_reg(u64 insn) {
25 POPC(*this, insn, GetReg20(insn));
26}
27
28void TranslatorVisitor::POPC_cbuf(u64 insn) {
29 POPC(*this, insn, GetCbuf(insn));
30}
31
32void TranslatorVisitor::POPC_imm(u64 insn) {
33 POPC(*this, insn, GetImm20(insn));
34}
35
36} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
new file mode 100644
index 000000000..044671943
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
@@ -0,0 +1,86 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b,
12 u64 scale_imm) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> op_a;
17 } const iscadd{insn};
18
19 const bool po{neg_a && neg_b};
20 IR::U32 op_a{v.X(iscadd.op_a)};
21 if (po) {
22 // When PO is present, add one
23 op_b = v.ir.IAdd(op_b, v.ir.Imm32(1));
24 } else {
25 // When PO is not present, the bits are interpreted as negation
26 if (neg_a) {
27 op_a = v.ir.INeg(op_a);
28 }
29 if (neg_b) {
30 op_b = v.ir.INeg(op_b);
31 }
32 }
33 // With the operands already processed, scale A
34 const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))};
35 const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)};
36
37 const IR::U32 result{v.ir.IAdd(scaled_a, op_b)};
38 v.X(iscadd.dest_reg, result);
39
40 if (cc) {
41 v.SetZFlag(v.ir.GetZeroFromOp(result));
42 v.SetSFlag(v.ir.GetSignFromOp(result));
43 const IR::U1 carry{v.ir.GetCarryFromOp(result)};
44 const IR::U1 overflow{v.ir.GetOverflowFromOp(result)};
45 v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry);
46 v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow);
47 }
48}
49
50void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
51 union {
52 u64 raw;
53 BitField<47, 1, u64> cc;
54 BitField<48, 1, u64> neg_b;
55 BitField<49, 1, u64> neg_a;
56 BitField<39, 5, u64> scale;
57 } const iscadd{insn};
58
59 ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale);
60}
61
62} // Anonymous namespace
63
64void TranslatorVisitor::ISCADD_reg(u64 insn) {
65 ISCADD(*this, insn, GetReg20(insn));
66}
67
68void TranslatorVisitor::ISCADD_cbuf(u64 insn) {
69 ISCADD(*this, insn, GetCbuf(insn));
70}
71
72void TranslatorVisitor::ISCADD_imm(u64 insn) {
73 ISCADD(*this, insn, GetImm20(insn));
74}
75
76void TranslatorVisitor::ISCADD32I(u64 insn) {
77 union {
78 u64 raw;
79 BitField<52, 1, u64> cc;
80 BitField<53, 5, u64> scale;
81 } const iscadd{insn};
82
83 return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale);
84}
85
86} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
new file mode 100644
index 000000000..bee10e5b9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
@@ -0,0 +1,58 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
13 CompareOp compare_op, bool is_signed, bool x) {
14 return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
15 : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
16}
17
18void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
19 union {
20 u64 raw;
21 BitField<0, 3, IR::Pred> dest_pred_b;
22 BitField<3, 3, IR::Pred> dest_pred_a;
23 BitField<8, 8, IR::Reg> src_reg_a;
24 BitField<39, 3, IR::Pred> bop_pred;
25 BitField<42, 1, u64> neg_bop_pred;
26 BitField<43, 1, u64> x;
27 BitField<45, 2, BooleanOp> bop;
28 BitField<48, 1, u64> is_signed;
29 BitField<49, 3, CompareOp> compare_op;
30 } const isetp{insn};
31
32 const bool is_signed{isetp.is_signed != 0};
33 const bool x{isetp.x != 0};
34 const BooleanOp bop{isetp.bop};
35 const CompareOp compare_op{isetp.compare_op};
36 const IR::U32 op_a{v.X(isetp.src_reg_a)};
37 const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)};
38 const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)};
39 const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
40 const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
41 v.ir.SetPred(isetp.dest_pred_a, result_a);
42 v.ir.SetPred(isetp.dest_pred_b, result_b);
43}
44} // Anonymous namespace
45
46void TranslatorVisitor::ISETP_reg(u64 insn) {
47 ISETP(*this, insn, GetReg20(insn));
48}
49
50void TranslatorVisitor::ISETP_cbuf(u64 insn) {
51 ISETP(*this, insn, GetCbuf(insn));
52}
53
54void TranslatorVisitor::ISETP_imm(u64 insn) {
55 ISETP(*this, insn, GetImm20(insn));
56}
57
58} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
new file mode 100644
index 000000000..20af68852
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_reg_a;
16 BitField<39, 1, u64> w;
17 BitField<43, 1, u64> x;
18 BitField<47, 1, u64> cc;
19 } const shl{insn};
20
21 if (shl.x != 0) {
22 throw NotImplementedException("SHL.X");
23 }
24 if (shl.cc != 0) {
25 throw NotImplementedException("SHL.CC");
26 }
27 const IR::U32 base{v.X(shl.src_reg_a)};
28 IR::U32 result;
29 if (shl.w != 0) {
30 // When .W is set, the shift value is wrapped
31 // To emulate this we just have to wrap it ourselves.
32 const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))};
33 result = v.ir.ShiftLeftLogical(base, shift);
34 } else {
35 // When .W is not set, the shift value is clamped between 0 and 32.
36 // To emulate this we have to have in mind the special shift of 32, that evaluates as 0.
37 // We can safely evaluate an out of bounds shift according to the SPIR-V specification:
38 //
39 // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical
40 // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than
41 // or equal to the bit width of the components of Base."
42 //
43 // And on the GLASM specification it is also safe to evaluate out of bounds:
44 //
45 // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt
46 // "The results of a shift operation ("<<") are undefined if the value of the second operand
47 // is negative, or greater than or equal to the number of bits in the first operand."
48 //
49 // Emphasis on undefined results in contrast to undefined behavior.
50 //
51 const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
52 const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
53 result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))};
54 }
55 v.X(shl.dest_reg, result);
56}
57} // Anonymous namespace
58
59void TranslatorVisitor::SHL_reg(u64 insn) {
60 SHL(*this, insn, GetReg20(insn));
61}
62
63void TranslatorVisitor::SHL_cbuf(u64 insn) {
64 SHL(*this, insn, GetCbuf(insn));
65}
66
67void TranslatorVisitor::SHL_imm(u64 insn) {
68 SHL(*this, insn, GetImm20(insn));
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
new file mode 100644
index 000000000..be00bb605
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_reg_a;
16 BitField<39, 1, u64> is_wrapped;
17 BitField<40, 1, u64> brev;
18 BitField<43, 1, u64> xmode;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> is_signed;
21 } const shr{insn};
22
23 if (shr.xmode != 0) {
24 throw NotImplementedException("SHR.XMODE");
25 }
26 if (shr.cc != 0) {
27 throw NotImplementedException("SHR.CC");
28 }
29
30 IR::U32 base{v.X(shr.src_reg_a)};
31 if (shr.brev == 1) {
32 base = v.ir.BitReverse(base);
33 }
34 IR::U32 result;
35 const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31));
36 if (shr.is_signed == 1) {
37 result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)};
38 } else {
39 result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)};
40 }
41
42 if (shr.is_wrapped == 0) {
43 const IR::U32 zero{v.ir.Imm32(0)};
44 const IR::U32 safe_bits{v.ir.Imm32(32)};
45
46 const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)};
47 const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)};
48 const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
49 result = IR::U32{v.ir.Select(is_safe, result, clamped_value)};
50 }
51 v.X(shr.dest_reg, result);
52}
53} // Anonymous namespace
54
55void TranslatorVisitor::SHR_reg(u64 insn) {
56 SHR(*this, insn, GetReg20(insn));
57}
58
59void TranslatorVisitor::SHR_cbuf(u64 insn) {
60 SHR(*this, insn, GetCbuf(insn));
61}
62
63void TranslatorVisitor::SHR_imm(u64 insn) {
64 SHR(*this, insn, GetImm20(insn));
65}
66} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
new file mode 100644
index 000000000..2932cdc42
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
@@ -0,0 +1,135 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class SelectMode : u64 {
12 Default,
13 CLO,
14 CHI,
15 CSFU,
16 CBCC,
17};
18
19enum class Half : u64 {
20 H0, // Least-significant bits (15:0)
21 H1, // Most-significant bits (31:16)
22};
23
24IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
25 const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
26 return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
27}
28
29void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
30 SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
31 union {
32 u64 raw;
33 BitField<0, 8, IR::Reg> dest_reg;
34 BitField<8, 8, IR::Reg> src_reg_a;
35 BitField<47, 1, u64> cc;
36 BitField<48, 1, u64> is_a_signed;
37 BitField<49, 1, u64> is_b_signed;
38 BitField<53, 1, Half> half_a;
39 } const xmad{insn};
40
41 if (x) {
42 throw NotImplementedException("XMAD X");
43 }
44 const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)};
45 const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)};
46
47 IR::U32 product{v.ir.IMul(op_a, op_b)};
48 if (psl) {
49 // .PSL shifts the product 16 bits
50 product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16));
51 }
52 const IR::U32 op_c{[&]() -> IR::U32 {
53 switch (select_mode) {
54 case SelectMode::Default:
55 return src_c;
56 case SelectMode::CLO:
57 return ExtractHalf(v, src_c, Half::H0, false);
58 case SelectMode::CHI:
59 return ExtractHalf(v, src_c, Half::H1, false);
60 case SelectMode::CBCC:
61 return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c);
62 case SelectMode::CSFU:
63 throw NotImplementedException("XMAD CSFU");
64 }
65 throw NotImplementedException("Invalid XMAD select mode {}", select_mode);
66 }()};
67 IR::U32 result{v.ir.IAdd(product, op_c)};
68 if (mrg) {
69 // .MRG inserts src_b [15:0] into result's [31:16].
70 const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
71 result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
72 }
73 if (xmad.cc) {
74 throw NotImplementedException("XMAD CC");
75 }
76 // Store result
77 v.X(xmad.dest_reg, result);
78}
79} // Anonymous namespace
80
81void TranslatorVisitor::XMAD_reg(u64 insn) {
82 union {
83 u64 raw;
84 BitField<35, 1, Half> half_b;
85 BitField<36, 1, u64> psl;
86 BitField<37, 1, u64> mrg;
87 BitField<38, 1, u64> x;
88 BitField<50, 3, SelectMode> select_mode;
89 } const xmad{insn};
90
91 XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
92 xmad.mrg != 0, xmad.x != 0);
93}
94
95void TranslatorVisitor::XMAD_rc(u64 insn) {
96 union {
97 u64 raw;
98 BitField<50, 2, SelectMode> select_mode;
99 BitField<52, 1, Half> half_b;
100 BitField<54, 1, u64> x;
101 } const xmad{insn};
102
103 XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false,
104 xmad.x != 0);
105}
106
107void TranslatorVisitor::XMAD_cr(u64 insn) {
108 union {
109 u64 raw;
110 BitField<50, 2, SelectMode> select_mode;
111 BitField<52, 1, Half> half_b;
112 BitField<54, 1, u64> x;
113 BitField<55, 1, u64> psl;
114 BitField<56, 1, u64> mrg;
115 } const xmad{insn};
116
117 XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
118 xmad.mrg != 0, xmad.x != 0);
119}
120
121void TranslatorVisitor::XMAD_imm(u64 insn) {
122 union {
123 u64 raw;
124 BitField<20, 16, u64> src_b;
125 BitField<36, 1, u64> psl;
126 BitField<37, 1, u64> mrg;
127 BitField<38, 1, u64> x;
128 BitField<50, 3, SelectMode> select_mode;
129 } const xmad{insn};
130
131 XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode,
132 Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
133}
134
135} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
new file mode 100644
index 000000000..53e8d8923
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
@@ -0,0 +1,126 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class IntegerWidth : u64 {
12 Byte,
13 Short,
14 Word,
15};
16
17[[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) {
18 switch (width) {
19 case IntegerWidth::Byte:
20 return ir.Imm32(8);
21 case IntegerWidth::Short:
22 return ir.Imm32(16);
23 case IntegerWidth::Word:
24 return ir.Imm32(32);
25 default:
26 throw NotImplementedException("Invalid width {}", width);
27 }
28}
29
30[[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src,
31 IntegerWidth dst_width) {
32 const IR::U32 zero{ir.Imm32(0)};
33 const IR::U32 count{WidthSize(ir, dst_width)};
34 return ir.BitFieldExtract(src, zero, count, false);
35}
36
37[[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width,
38 bool dst_signed, bool src_signed) {
39 IR::U32 min{};
40 IR::U32 max{};
41 const IR::U32 zero{ir.Imm32(0)};
42 switch (dst_width) {
43 case IntegerWidth::Byte:
44 min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero;
45 max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff);
46 break;
47 case IntegerWidth::Short:
48 min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero;
49 max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff);
50 break;
51 case IntegerWidth::Word:
52 min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero;
53 max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff);
54 break;
55 default:
56 throw NotImplementedException("Invalid width {}", dst_width);
57 }
58 const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src};
59 return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max);
60}
61
62void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) {
63 union {
64 u64 insn;
65 BitField<0, 8, IR::Reg> dest_reg;
66 BitField<8, 2, IntegerWidth> dst_fmt;
67 BitField<12, 1, u64> dst_fmt_sign;
68 BitField<10, 2, IntegerWidth> src_fmt;
69 BitField<13, 1, u64> src_fmt_sign;
70 BitField<41, 3, u64> selector;
71 BitField<45, 1, u64> neg;
72 BitField<47, 1, u64> cc;
73 BitField<49, 1, u64> abs;
74 BitField<50, 1, u64> sat;
75 } const i2i{insn};
76
77 if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) {
78 throw NotImplementedException("16-bit source format incompatible with selector {}",
79 i2i.selector);
80 }
81 if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) {
82 throw NotImplementedException("32-bit source format incompatible with selector {}",
83 i2i.selector);
84 }
85
86 const s32 selector{static_cast<s32>(i2i.selector)};
87 const IR::U32 offset{v.ir.Imm32(selector * 8)};
88 const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)};
89 const bool src_signed{i2i.src_fmt_sign != 0};
90 const bool dst_signed{i2i.dst_fmt_sign != 0};
91 const bool sat{i2i.sat != 0};
92
93 IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)};
94 if (i2i.abs != 0) {
95 src_values = v.ir.IAbs(src_values);
96 }
97 if (i2i.neg != 0) {
98 src_values = v.ir.INeg(src_values);
99 }
100 const IR::U32 result{
101 sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed)
102 : ConvertInteger(v.ir, src_values, i2i.dst_fmt)};
103
104 v.X(i2i.dest_reg, result);
105 if (i2i.cc != 0) {
106 v.SetZFlag(v.ir.GetZeroFromOp(result));
107 v.SetSFlag(v.ir.GetSignFromOp(result));
108 v.ResetCFlag();
109 v.ResetOFlag();
110 }
111}
112} // Anonymous namespace
113
114void TranslatorVisitor::I2I_reg(u64 insn) {
115 I2I(*this, insn, GetReg20(insn));
116}
117
118void TranslatorVisitor::I2I_cbuf(u64 insn) {
119 I2I(*this, insn, GetCbuf(insn));
120}
121
122void TranslatorVisitor::I2I_imm(u64 insn) {
123 I2I(*this, insn, GetImm20(insn));
124}
125
126} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
new file mode 100644
index 000000000..9b85f8059
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
@@ -0,0 +1,53 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 Default,
13 Patch,
14 Prim,
15 Attr,
16};
17
18enum class Shift : u64 {
19 Default,
20 U16,
21 B32,
22};
23
24} // Anonymous namespace
25
26void TranslatorVisitor::ISBERD(u64 insn) {
27 union {
28 u64 raw;
29 BitField<0, 8, IR::Reg> dest_reg;
30 BitField<8, 8, IR::Reg> src_reg;
31 BitField<31, 1, u64> skew;
32 BitField<32, 1, u64> o;
33 BitField<33, 2, Mode> mode;
34 BitField<47, 2, Shift> shift;
35 } const isberd{insn};
36
37 if (isberd.skew != 0) {
38 throw NotImplementedException("SKEW");
39 }
40 if (isberd.o != 0) {
41 throw NotImplementedException("O");
42 }
43 if (isberd.mode != Mode::Default) {
44 throw NotImplementedException("Mode {}", isberd.mode.Value());
45 }
46 if (isberd.shift != Shift::Default) {
47 throw NotImplementedException("Shift {}", isberd.shift.Value());
48 }
49 LOG_WARNING(Shader, "(STUBBED) called");
50 X(isberd.dest_reg, X(isberd.src_reg));
51}
52
53} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
new file mode 100644
index 000000000..2300088e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
9
10namespace Shader::Maxwell {
11using namespace LDC;
12namespace {
13std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index,
14 const IR::U32& reg, const IR::U32& imm) {
15 switch (mode) {
16 case Mode::Default:
17 return {imm_index, ir.IAdd(reg, imm)};
18 default:
19 break;
20 }
21 throw NotImplementedException("Mode {}", mode);
22}
23} // Anonymous namespace
24
25void TranslatorVisitor::LDC(u64 insn) {
26 const Encoding ldc{insn};
27 const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))};
28 const IR::U32 reg{X(ldc.src_reg)};
29 const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))};
30 const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)};
31 switch (ldc.size) {
32 case Size::U8:
33 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)});
34 break;
35 case Size::S8:
36 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)});
37 break;
38 case Size::U16:
39 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)});
40 break;
41 case Size::S16:
42 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)});
43 break;
44 case Size::B32:
45 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)});
46 break;
47 case Size::B64: {
48 if (!IR::IsAligned(ldc.dest_reg, 2)) {
49 throw NotImplementedException("Unaligned destination register");
50 }
51 const IR::Value vector{ir.GetCbuf(index, offset, 64, false)};
52 for (int i = 0; i < 2; ++i) {
53 X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
54 }
55 break;
56 }
57 default:
58 throw NotImplementedException("Invalid size {}", ldc.size.Value());
59 }
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
new file mode 100644
index 000000000..3074ea0e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
@@ -0,0 +1,39 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/reg.h"
10
11namespace Shader::Maxwell::LDC {
12
13enum class Mode : u64 {
14 Default,
15 IL,
16 IS,
17 ISL,
18};
19
20enum class Size : u64 {
21 U8,
22 S8,
23 U16,
24 S16,
25 B32,
26 B64,
27};
28
29union Encoding {
30 u64 raw;
31 BitField<0, 8, IR::Reg> dest_reg;
32 BitField<8, 8, IR::Reg> src_reg;
33 BitField<20, 16, s64> offset;
34 BitField<36, 5, u64> index;
35 BitField<44, 2, Mode> mode;
36 BitField<48, 3, Size> size;
37};
38
39} // namespace Shader::Maxwell::LDC
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
new file mode 100644
index 000000000..4a0f04e47
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
@@ -0,0 +1,108 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale,
12 bool neg, bool x) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> offset_lo_reg;
17 BitField<47, 1, u64> cc;
18 BitField<48, 3, IR::Pred> pred;
19 } const lea{insn};
20
21 if (x) {
22 throw NotImplementedException("LEA.HI X");
23 }
24 if (lea.pred != IR::Pred::PT) {
25 throw NotImplementedException("LEA.HI Pred");
26 }
27 if (lea.cc != 0) {
28 throw NotImplementedException("LEA.HI CC");
29 }
30
31 const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
32 const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))};
33 const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset};
34
35 const s32 hi_scale{32 - static_cast<s32>(scale)};
36 const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))};
37 const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)};
38
39 IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)};
40 v.X(lea.dest_reg, result);
41}
42
43void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) {
44 union {
45 u64 insn;
46 BitField<0, 8, IR::Reg> dest_reg;
47 BitField<8, 8, IR::Reg> offset_lo_reg;
48 BitField<39, 5, u64> scale;
49 BitField<45, 1, u64> neg;
50 BitField<46, 1, u64> x;
51 BitField<47, 1, u64> cc;
52 BitField<48, 3, IR::Pred> pred;
53 } const lea{insn};
54 if (lea.x != 0) {
55 throw NotImplementedException("LEA.LO X");
56 }
57 if (lea.pred != IR::Pred::PT) {
58 throw NotImplementedException("LEA.LO Pred");
59 }
60 if (lea.cc != 0) {
61 throw NotImplementedException("LEA.LO CC");
62 }
63
64 const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
65 const s32 scale{static_cast<s32>(lea.scale)};
66 const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo};
67 const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))};
68
69 IR::U32 result{v.ir.IAdd(base, scaled_offset)};
70 v.X(lea.dest_reg, result);
71}
72} // Anonymous namespace
73
74void TranslatorVisitor::LEA_hi_reg(u64 insn) {
75 union {
76 u64 insn;
77 BitField<28, 5, u64> scale;
78 BitField<37, 1, u64> neg;
79 BitField<38, 1, u64> x;
80 } const lea{insn};
81
82 LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
83}
84
85void TranslatorVisitor::LEA_hi_cbuf(u64 insn) {
86 union {
87 u64 insn;
88 BitField<51, 5, u64> scale;
89 BitField<56, 1, u64> neg;
90 BitField<57, 1, u64> x;
91 } const lea{insn};
92
93 LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
94}
95
96void TranslatorVisitor::LEA_lo_reg(u64 insn) {
97 LEA_lo(*this, insn, GetReg20(insn));
98}
99
100void TranslatorVisitor::LEA_lo_cbuf(u64 insn) {
101 LEA_lo(*this, insn, GetCbuf(insn));
102}
103
104void TranslatorVisitor::LEA_lo_imm(u64 insn) {
105 LEA_lo(*this, insn, GetImm20(insn));
106}
107
108} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
new file mode 100644
index 000000000..924fb7a40
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
@@ -0,0 +1,196 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/ir/ir_emitter.h"
9#include "shader_recompiler/frontend/maxwell/opcodes.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Size : u64 {
15 B32,
16 B64,
17 B96,
18 B128,
19};
20
21enum class InterpolationMode : u64 {
22 Pass,
23 Multiply,
24 Constant,
25 Sc,
26};
27
28enum class SampleMode : u64 {
29 Default,
30 Centroid,
31 Offset,
32};
33
34u32 NumElements(Size size) {
35 switch (size) {
36 case Size::B32:
37 return 1;
38 case Size::B64:
39 return 2;
40 case Size::B96:
41 return 3;
42 case Size::B128:
43 return 4;
44 }
45 throw InvalidArgument("Invalid size {}", size);
46}
47
48template <typename F>
49void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) {
50 const IR::U32 index_value{v.X(index_reg)};
51 for (u32 element = 0; element < num_elements; ++element) {
52 const IR::U32 final_offset{
53 element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}};
54 f(element, final_offset);
55 }
56}
57
58} // Anonymous namespace
59
60void TranslatorVisitor::ALD(u64 insn) {
61 union {
62 u64 raw;
63 BitField<0, 8, IR::Reg> dest_reg;
64 BitField<8, 8, IR::Reg> index_reg;
65 BitField<20, 10, u64> absolute_offset;
66 BitField<20, 11, s64> relative_offset;
67 BitField<39, 8, IR::Reg> vertex_reg;
68 BitField<32, 1, u64> o;
69 BitField<31, 1, u64> patch;
70 BitField<47, 2, Size> size;
71 } const ald{insn};
72
73 const u64 offset{ald.absolute_offset.Value()};
74 if (offset % 4 != 0) {
75 throw NotImplementedException("Unaligned absolute offset {}", offset);
76 }
77 const IR::U32 vertex{X(ald.vertex_reg)};
78 const u32 num_elements{NumElements(ald.size)};
79 if (ald.index_reg == IR::Reg::RZ) {
80 for (u32 element = 0; element < num_elements; ++element) {
81 if (ald.patch != 0) {
82 const IR::Patch patch{offset / 4 + element};
83 F(ald.dest_reg + static_cast<int>(element), ir.GetPatch(patch));
84 } else {
85 const IR::Attribute attr{offset / 4 + element};
86 F(ald.dest_reg + static_cast<int>(element), ir.GetAttribute(attr, vertex));
87 }
88 }
89 return;
90 }
91 if (ald.patch != 0) {
92 throw NotImplementedException("Indirect patch read");
93 }
94 HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
95 F(ald.dest_reg + static_cast<int>(element), ir.GetAttributeIndexed(final_offset, vertex));
96 });
97}
98
99void TranslatorVisitor::AST(u64 insn) {
100 union {
101 u64 raw;
102 BitField<0, 8, IR::Reg> src_reg;
103 BitField<8, 8, IR::Reg> index_reg;
104 BitField<20, 10, u64> absolute_offset;
105 BitField<20, 11, s64> relative_offset;
106 BitField<31, 1, u64> patch;
107 BitField<39, 8, IR::Reg> vertex_reg;
108 BitField<47, 2, Size> size;
109 } const ast{insn};
110
111 if (ast.index_reg != IR::Reg::RZ) {
112 throw NotImplementedException("Indexed store");
113 }
114 const u64 offset{ast.absolute_offset.Value()};
115 if (offset % 4 != 0) {
116 throw NotImplementedException("Unaligned absolute offset {}", offset);
117 }
118 const IR::U32 vertex{X(ast.vertex_reg)};
119 const u32 num_elements{NumElements(ast.size)};
120 if (ast.index_reg == IR::Reg::RZ) {
121 for (u32 element = 0; element < num_elements; ++element) {
122 if (ast.patch != 0) {
123 const IR::Patch patch{offset / 4 + element};
124 ir.SetPatch(patch, F(ast.src_reg + static_cast<int>(element)));
125 } else {
126 const IR::Attribute attr{offset / 4 + element};
127 ir.SetAttribute(attr, F(ast.src_reg + static_cast<int>(element)), vertex);
128 }
129 }
130 return;
131 }
132 if (ast.patch != 0) {
133 throw NotImplementedException("Indexed tessellation patch store");
134 }
135 HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
136 ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast<int>(element)), vertex);
137 });
138}
139
140void TranslatorVisitor::IPA(u64 insn) {
141 // IPA is the instruction used to read varyings from a fragment shader.
142 // gl_FragCoord is mapped to the gl_Position attribute.
143 // It yields unknown results when used outside of the fragment shader stage.
144 union {
145 u64 raw;
146 BitField<0, 8, IR::Reg> dest_reg;
147 BitField<8, 8, IR::Reg> index_reg;
148 BitField<20, 8, IR::Reg> multiplier;
149 BitField<30, 8, IR::Attribute> attribute;
150 BitField<38, 1, u64> idx;
151 BitField<51, 1, u64> sat;
152 BitField<52, 2, SampleMode> sample_mode;
153 BitField<54, 2, InterpolationMode> interpolation_mode;
154 } const ipa{insn};
155
156 // Indexed IPAs are used for indexed varyings.
157 // For example:
158 //
159 // in vec4 colors[4];
160 // uniform int idx;
161 // void main() {
162 // gl_FragColor = colors[idx];
163 // }
164 const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ};
165 const IR::Attribute attribute{ipa.attribute};
166 IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg))
167 : ir.GetAttribute(attribute)};
168 if (IR::IsGeneric(attribute)) {
169 const ProgramHeader& sph{env.SPH()};
170 const u32 attr_index{IR::GenericAttributeIndex(attribute)};
171 const u32 element{static_cast<u32>(attribute) % 4};
172 const std::array input_map{sph.ps.GenericInputMap(attr_index)};
173 const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective};
174 if (is_perspective) {
175 const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)};
176 value = ir.FPMul(value, position_w);
177 }
178 }
179 if (ipa.interpolation_mode == InterpolationMode::Multiply) {
180 value = ir.FPMul(value, F(ipa.multiplier));
181 }
182
183 // Saturated IPAs are generally generated out of clamped varyings.
184 // For example: clamp(some_varying, 0.0, 1.0)
185 const bool is_saturated{ipa.sat != 0};
186 if (is_saturated) {
187 if (attribute == IR::Attribute::FrontFace) {
188 throw NotImplementedException("IPA.SAT on FrontFace");
189 }
190 value = ir.FPSaturate(value);
191 }
192
193 F(ipa.dest_reg, value);
194}
195
196} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
new file mode 100644
index 000000000..d2a1dbf61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
@@ -0,0 +1,218 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Size : u64 {
12 U8,
13 S8,
14 U16,
15 S16,
16 B32,
17 B64,
18 B128,
19};
20
21IR::U32 Offset(TranslatorVisitor& v, u64 insn) {
22 union {
23 u64 raw;
24 BitField<8, 8, IR::Reg> offset_reg;
25 BitField<20, 24, u64> absolute_offset;
26 BitField<20, 24, s64> relative_offset;
27 } const encoding{insn};
28
29 if (encoding.offset_reg == IR::Reg::RZ) {
30 return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset));
31 } else {
32 const s32 relative{static_cast<s32>(encoding.relative_offset.Value())};
33 return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
34 }
35}
36
37std::pair<IR::U32, IR::U32> WordOffset(TranslatorVisitor& v, u64 insn) {
38 const IR::U32 offset{Offset(v, insn)};
39 if (offset.IsImmediate()) {
40 return {v.ir.Imm32(offset.U32() / 4), offset};
41 } else {
42 return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset};
43 }
44}
45
46std::pair<int, bool> GetSize(u64 insn) {
47 union {
48 u64 raw;
49 BitField<48, 3, Size> size;
50 } const encoding{insn};
51
52 switch (encoding.size) {
53 case Size::U8:
54 return {8, false};
55 case Size::S8:
56 return {8, true};
57 case Size::U16:
58 return {16, false};
59 case Size::S16:
60 return {16, true};
61 case Size::B32:
62 return {32, false};
63 case Size::B64:
64 return {64, false};
65 case Size::B128:
66 return {128, false};
67 default:
68 throw NotImplementedException("Invalid size {}", encoding.size.Value());
69 }
70}
71
72IR::Reg Reg(u64 insn) {
73 union {
74 u64 raw;
75 BitField<0, 8, IR::Reg> reg;
76 } const encoding{insn};
77
78 return encoding.reg;
79}
80
81IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) {
82 return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24));
83}
84
85IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) {
86 return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16));
87}
88
89IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) {
90 const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())};
91 const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)};
92 return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))};
93}
94} // Anonymous namespace
95
96void TranslatorVisitor::LDL(u64 insn) {
97 const auto [word_offset, offset]{WordOffset(*this, insn)};
98 const IR::U32 word{LoadLocal(*this, word_offset, offset)};
99 const IR::Reg dest{Reg(insn)};
100 const auto [bit_size, is_signed]{GetSize(insn)};
101 switch (bit_size) {
102 case 8: {
103 const IR::U32 bit{ByteOffset(ir, offset)};
104 X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed));
105 break;
106 }
107 case 16: {
108 const IR::U32 bit{ShortOffset(ir, offset)};
109 X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed));
110 break;
111 }
112 case 32:
113 case 64:
114 case 128:
115 if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
116 throw NotImplementedException("Unaligned destination register {}", dest);
117 }
118 X(dest, word);
119 for (int i = 1; i < bit_size / 32; ++i) {
120 const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))};
121 const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))};
122 X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset));
123 }
124 break;
125 }
126}
127
128void TranslatorVisitor::LDS(u64 insn) {
129 const IR::U32 offset{Offset(*this, insn)};
130 const IR::Reg dest{Reg(insn)};
131 const auto [bit_size, is_signed]{GetSize(insn)};
132 const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)};
133 switch (bit_size) {
134 case 8:
135 case 16:
136 case 32:
137 X(dest, IR::U32{value});
138 break;
139 case 64:
140 case 128:
141 if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
142 throw NotImplementedException("Unaligned destination register {}", dest);
143 }
144 for (int element = 0; element < bit_size / 32; ++element) {
145 X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))});
146 }
147 break;
148 }
149}
150
151void TranslatorVisitor::STL(u64 insn) {
152 const auto [word_offset, offset]{WordOffset(*this, insn)};
153 if (offset.IsImmediate()) {
154 // TODO: Support storing out of bounds at runtime
155 if (offset.U32() >= env.LocalMemorySize()) {
156 LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping",
157 offset.U32(), env.LocalMemorySize());
158 return;
159 }
160 }
161 const IR::Reg reg{Reg(insn)};
162 const IR::U32 src{X(reg)};
163 const int bit_size{GetSize(insn).first};
164 switch (bit_size) {
165 case 8: {
166 const IR::U32 bit{ByteOffset(ir, offset)};
167 const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))};
168 ir.WriteLocal(word_offset, value);
169 break;
170 }
171 case 16: {
172 const IR::U32 bit{ShortOffset(ir, offset)};
173 const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))};
174 ir.WriteLocal(word_offset, value);
175 break;
176 }
177 case 32:
178 case 64:
179 case 128:
180 if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) {
181 throw NotImplementedException("Unaligned source register");
182 }
183 ir.WriteLocal(word_offset, src);
184 for (int i = 1; i < bit_size / 32; ++i) {
185 ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i));
186 }
187 break;
188 }
189}
190
191void TranslatorVisitor::STS(u64 insn) {
192 const IR::U32 offset{Offset(*this, insn)};
193 const IR::Reg reg{Reg(insn)};
194 const int bit_size{GetSize(insn).first};
195 switch (bit_size) {
196 case 8:
197 case 16:
198 case 32:
199 ir.WriteShared(bit_size, offset, X(reg));
200 break;
201 case 64:
202 if (!IR::IsAligned(reg, 2)) {
203 throw NotImplementedException("Unaligned source register {}", reg);
204 }
205 ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1)));
206 break;
207 case 128: {
208 if (!IR::IsAligned(reg, 2)) {
209 throw NotImplementedException("Unaligned source register {}", reg);
210 }
211 const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))};
212 ir.WriteShared(128, offset, vector);
213 break;
214 }
215 }
216}
217
218} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
new file mode 100644
index 000000000..36c5cff2f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
@@ -0,0 +1,184 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class LoadSize : u64 {
14 U8, // Zero-extend
15 S8, // Sign-extend
16 U16, // Zero-extend
17 S16, // Sign-extend
18 B32,
19 B64,
20 B128,
21 U128, // ???
22};
23
24enum class StoreSize : u64 {
25 U8, // Zero-extend
26 S8, // Sign-extend
27 U16, // Zero-extend
28 S16, // Sign-extend
29 B32,
30 B64,
31 B128,
32};
33
34// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
35enum class LoadCache : u64 {
36 CA, // Cache at all levels, likely to be accessed again
37 CG, // Cache at global level (cache in L2 and below, not L1)
38 CI, // ???
39 CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again)
40};
41
42// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
43enum class StoreCache : u64 {
44 WB, // Cache write-back all coherent levels
45 CG, // Cache at global level
46 CS, // Cache streaming, likely to be accessed once
47 WT, // Cache write-through (to system memory)
48};
49
50IR::U64 Address(TranslatorVisitor& v, u64 insn) {
51 union {
52 u64 raw;
53 BitField<8, 8, IR::Reg> addr_reg;
54 BitField<20, 24, s64> addr_offset;
55 BitField<20, 24, u64> rz_addr_offset;
56 BitField<45, 1, u64> e;
57 } const mem{insn};
58
59 const IR::U64 address{[&]() -> IR::U64 {
60 if (mem.e == 0) {
61 // LDG/STG without .E uses a 32-bit pointer, zero-extend it
62 return v.ir.UConvert(64, v.X(mem.addr_reg));
63 }
64 if (!IR::IsAligned(mem.addr_reg, 2)) {
65 throw NotImplementedException("Unaligned address register");
66 }
67 // Pack two registers to build the 64-bit address
68 return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1)));
69 }()};
70 const u64 addr_offset{[&]() -> u64 {
71 if (mem.addr_reg == IR::Reg::RZ) {
72 // When RZ is used, the address is an absolute address
73 return static_cast<u64>(mem.rz_addr_offset.Value());
74 } else {
75 return static_cast<u64>(mem.addr_offset.Value());
76 }
77 }()};
78 // Apply the offset
79 return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
80}
81} // Anonymous namespace
82
83void TranslatorVisitor::LDG(u64 insn) {
84 // LDG loads global memory into registers
85 union {
86 u64 raw;
87 BitField<0, 8, IR::Reg> dest_reg;
88 BitField<46, 2, LoadCache> cache;
89 BitField<48, 3, LoadSize> size;
90 } const ldg{insn};
91
92 // Pointer to load data from
93 const IR::U64 address{Address(*this, insn)};
94 const IR::Reg dest_reg{ldg.dest_reg};
95 switch (ldg.size) {
96 case LoadSize::U8:
97 X(dest_reg, ir.LoadGlobalU8(address));
98 break;
99 case LoadSize::S8:
100 X(dest_reg, ir.LoadGlobalS8(address));
101 break;
102 case LoadSize::U16:
103 X(dest_reg, ir.LoadGlobalU16(address));
104 break;
105 case LoadSize::S16:
106 X(dest_reg, ir.LoadGlobalS16(address));
107 break;
108 case LoadSize::B32:
109 X(dest_reg, ir.LoadGlobal32(address));
110 break;
111 case LoadSize::B64: {
112 if (!IR::IsAligned(dest_reg, 2)) {
113 throw NotImplementedException("Unaligned data registers");
114 }
115 const IR::Value vector{ir.LoadGlobal64(address)};
116 for (int i = 0; i < 2; ++i) {
117 X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
118 }
119 break;
120 }
121 case LoadSize::B128:
122 case LoadSize::U128: {
123 if (!IR::IsAligned(dest_reg, 4)) {
124 throw NotImplementedException("Unaligned data registers");
125 }
126 const IR::Value vector{ir.LoadGlobal128(address)};
127 for (int i = 0; i < 4; ++i) {
128 X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
129 }
130 break;
131 }
132 default:
133 throw NotImplementedException("Invalid LDG size {}", ldg.size.Value());
134 }
135}
136
137void TranslatorVisitor::STG(u64 insn) {
138 // STG stores registers into global memory.
139 union {
140 u64 raw;
141 BitField<0, 8, IR::Reg> data_reg;
142 BitField<46, 2, StoreCache> cache;
143 BitField<48, 3, StoreSize> size;
144 } const stg{insn};
145
146 // Pointer to store data into
147 const IR::U64 address{Address(*this, insn)};
148 const IR::Reg data_reg{stg.data_reg};
149 switch (stg.size) {
150 case StoreSize::U8:
151 ir.WriteGlobalU8(address, X(data_reg));
152 break;
153 case StoreSize::S8:
154 ir.WriteGlobalS8(address, X(data_reg));
155 break;
156 case StoreSize::U16:
157 ir.WriteGlobalU16(address, X(data_reg));
158 break;
159 case StoreSize::S16:
160 ir.WriteGlobalS16(address, X(data_reg));
161 break;
162 case StoreSize::B32:
163 ir.WriteGlobal32(address, X(data_reg));
164 break;
165 case StoreSize::B64: {
166 if (!IR::IsAligned(data_reg, 2)) {
167 throw NotImplementedException("Unaligned data registers");
168 }
169 const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))};
170 ir.WriteGlobal64(address, vector);
171 break;
172 }
173 case StoreSize::B128:
174 if (!IR::IsAligned(data_reg, 4)) {
175 throw NotImplementedException("Unaligned data registers");
176 }
177 const IR::Value vector{
178 ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))};
179 ir.WriteGlobal128(address, vector);
180 break;
181 }
182}
183
184} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
new file mode 100644
index 000000000..92cd27ed4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
@@ -0,0 +1,116 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12enum class LogicalOp : u64 {
13 AND,
14 OR,
15 XOR,
16 PASS_B,
17};
18
19[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1,
20 const IR::U32& operand_2, LogicalOp op) {
21 switch (op) {
22 case LogicalOp::AND:
23 return ir.BitwiseAnd(operand_1, operand_2);
24 case LogicalOp::OR:
25 return ir.BitwiseOr(operand_1, operand_2);
26 case LogicalOp::XOR:
27 return ir.BitwiseXor(operand_1, operand_2);
28 case LogicalOp::PASS_B:
29 return operand_2;
30 default:
31 throw NotImplementedException("Invalid Logical operation {}", op);
32 }
33}
34
35void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b,
36 LogicalOp bit_op, std::optional<PredicateOp> pred_op = std::nullopt,
37 IR::Pred dest_pred = IR::Pred::PT) {
38 union {
39 u64 insn;
40 BitField<0, 8, IR::Reg> dest_reg;
41 BitField<8, 8, IR::Reg> src_reg;
42 } const lop{insn};
43
44 if (x) {
45 throw NotImplementedException("X");
46 }
47 IR::U32 op_a{v.X(lop.src_reg)};
48 if (inv_a != 0) {
49 op_a = v.ir.BitwiseNot(op_a);
50 }
51 if (inv_b != 0) {
52 op_b = v.ir.BitwiseNot(op_b);
53 }
54
55 const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)};
56 if (pred_op) {
57 const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)};
58 v.ir.SetPred(dest_pred, pred_result);
59 }
60 if (cc) {
61 if (bit_op == LogicalOp::PASS_B) {
62 v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0)));
63 v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true));
64 } else {
65 v.SetZFlag(v.ir.GetZeroFromOp(result));
66 v.SetSFlag(v.ir.GetSignFromOp(result));
67 }
68 v.ResetCFlag();
69 v.ResetOFlag();
70 }
71 v.X(lop.dest_reg, result);
72}
73
74void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
75 union {
76 u64 insn;
77 BitField<39, 1, u64> inv_a;
78 BitField<40, 1, u64> inv_b;
79 BitField<41, 2, LogicalOp> bit_op;
80 BitField<43, 1, u64> x;
81 BitField<44, 2, PredicateOp> pred_op;
82 BitField<47, 1, u64> cc;
83 BitField<48, 3, IR::Pred> dest_pred;
84 } const lop{insn};
85
86 LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op,
87 lop.pred_op, lop.dest_pred);
88}
89} // Anonymous namespace
90
91void TranslatorVisitor::LOP_reg(u64 insn) {
92 LOP(*this, insn, GetReg20(insn));
93}
94
95void TranslatorVisitor::LOP_cbuf(u64 insn) {
96 LOP(*this, insn, GetCbuf(insn));
97}
98
99void TranslatorVisitor::LOP_imm(u64 insn) {
100 LOP(*this, insn, GetImm20(insn));
101}
102
103void TranslatorVisitor::LOP32I(u64 insn) {
104 union {
105 u64 raw;
106 BitField<53, 2, LogicalOp> bit_op;
107 BitField<57, 1, u64> x;
108 BitField<52, 1, u64> cc;
109 BitField<55, 1, u64> inv_a;
110 BitField<56, 1, u64> inv_b;
111 } const lop32i{insn};
112
113 LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0,
114 lop32i.inv_b != 0, lop32i.bit_op);
115}
116} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
new file mode 100644
index 000000000..e0fe47912
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
@@ -0,0 +1,122 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651
13// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
14IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
15 u64 ttbl) {
16 IR::U32 r{ir.Imm32(0)};
17 const IR::U32 not_a{ir.BitwiseNot(a)};
18 const IR::U32 not_b{ir.BitwiseNot(b)};
19 const IR::U32 not_c{ir.BitwiseNot(c)};
20 if (ttbl & 0x01) {
21 // r |= ~a & ~b & ~c;
22 const auto lhs{ir.BitwiseAnd(not_a, not_b)};
23 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
24 r = ir.BitwiseOr(r, rhs);
25 }
26 if (ttbl & 0x02) {
27 // r |= ~a & ~b & c;
28 const auto lhs{ir.BitwiseAnd(not_a, not_b)};
29 const auto rhs{ir.BitwiseAnd(lhs, c)};
30 r = ir.BitwiseOr(r, rhs);
31 }
32 if (ttbl & 0x04) {
33 // r |= ~a & b & ~c;
34 const auto lhs{ir.BitwiseAnd(not_a, b)};
35 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
36 r = ir.BitwiseOr(r, rhs);
37 }
38 if (ttbl & 0x08) {
39 // r |= ~a & b & c;
40 const auto lhs{ir.BitwiseAnd(not_a, b)};
41 const auto rhs{ir.BitwiseAnd(lhs, c)};
42 r = ir.BitwiseOr(r, rhs);
43 }
44 if (ttbl & 0x10) {
45 // r |= a & ~b & ~c;
46 const auto lhs{ir.BitwiseAnd(a, not_b)};
47 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
48 r = ir.BitwiseOr(r, rhs);
49 }
50 if (ttbl & 0x20) {
51 // r |= a & ~b & c;
52 const auto lhs{ir.BitwiseAnd(a, not_b)};
53 const auto rhs{ir.BitwiseAnd(lhs, c)};
54 r = ir.BitwiseOr(r, rhs);
55 }
56 if (ttbl & 0x40) {
57 // r |= a & b & ~c;
58 const auto lhs{ir.BitwiseAnd(a, b)};
59 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
60 r = ir.BitwiseOr(r, rhs);
61 }
62 if (ttbl & 0x80) {
63 // r |= a & b & c;
64 const auto lhs{ir.BitwiseAnd(a, b)};
65 const auto rhs{ir.BitwiseAnd(lhs, c)};
66 r = ir.BitwiseOr(r, rhs);
67 }
68 return r;
69}
70
71IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
72 union {
73 u64 insn;
74 BitField<0, 8, IR::Reg> dest_reg;
75 BitField<8, 8, IR::Reg> src_reg;
76 BitField<47, 1, u64> cc;
77 } const lop3{insn};
78
79 if (lop3.cc != 0) {
80 throw NotImplementedException("LOP3 CC");
81 }
82
83 const IR::U32 op_a{v.X(lop3.src_reg)};
84 const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)};
85 v.X(lop3.dest_reg, result);
86 return result;
87}
88
89u64 GetLut48(u64 insn) {
90 union {
91 u64 raw;
92 BitField<48, 8, u64> lut;
93 } const lut{insn};
94 return lut.lut;
95}
96} // Anonymous namespace
97
98void TranslatorVisitor::LOP3_reg(u64 insn) {
99 union {
100 u64 insn;
101 BitField<28, 8, u64> lut;
102 BitField<38, 1, u64> x;
103 BitField<36, 2, PredicateOp> pred_op;
104 BitField<48, 3, IR::Pred> pred;
105 } const lop3{insn};
106
107 if (lop3.x != 0) {
108 throw NotImplementedException("LOP3 X");
109 }
110 const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)};
111 const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)};
112 ir.SetPred(lop3.pred, pred_result);
113}
114
115void TranslatorVisitor::LOP3_cbuf(u64 insn) {
116 LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn));
117}
118
119void TranslatorVisitor::LOP3_imm(u64 insn) {
120 LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn));
121}
122} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
new file mode 100644
index 000000000..4324fd443
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 PR,
13 CC,
14};
15} // Anonymous namespace
16
17void TranslatorVisitor::P2R_reg(u64) {
18 throw NotImplementedException("P2R (reg)");
19}
20
21void TranslatorVisitor::P2R_cbuf(u64) {
22 throw NotImplementedException("P2R (cbuf)");
23}
24
25void TranslatorVisitor::P2R_imm(u64 insn) {
26 union {
27 u64 raw;
28 BitField<0, 8, IR::Reg> dest_reg;
29 BitField<8, 8, IR::Reg> src;
30 BitField<40, 1, Mode> mode;
31 BitField<41, 2, u64> byte_selector;
32 } const p2r{insn};
33
34 const u32 mask{GetImm20(insn).U32()};
35 const bool pr_mode{p2r.mode == Mode::PR};
36 const u32 num_items{pr_mode ? 7U : 4U};
37 const u32 offset{static_cast<u32>(p2r.byte_selector) * 8};
38 IR::U32 insert{ir.Imm32(0)};
39 for (u32 index = 0; index < num_items; ++index) {
40 if (((mask >> index) & 1) == 0) {
41 continue;
42 }
43 const IR::U1 cond{[this, index, pr_mode] {
44 if (pr_mode) {
45 return ir.GetPred(IR::Pred{index});
46 }
47 switch (index) {
48 case 0:
49 return ir.GetZFlag();
50 case 1:
51 return ir.GetSFlag();
52 case 2:
53 return ir.GetCFlag();
54 case 3:
55 return ir.GetOFlag();
56 }
57 throw LogicError("Unreachable P2R index");
58 }()};
59 const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))};
60 insert = ir.BitwiseOr(insert, bit);
61 }
62 const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))};
63 X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert));
64}
65
66} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
new file mode 100644
index 000000000..6bb08db8a
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<39, 4, u64> mask;
18 BitField<12, 4, u64> mov32i_mask;
19 } const mov{insn};
20
21 if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) {
22 throw NotImplementedException("Non-full move mask");
23 }
24 v.X(mov.dest_reg, src);
25}
26} // Anonymous namespace
27
28void TranslatorVisitor::MOV_reg(u64 insn) {
29 MOV(*this, insn, GetReg20(insn));
30}
31
32void TranslatorVisitor::MOV_cbuf(u64 insn) {
33 MOV(*this, insn, GetCbuf(insn));
34}
35
36void TranslatorVisitor::MOV_imm(u64 insn) {
37 MOV(*this, insn, GetImm20(insn));
38}
39
40void TranslatorVisitor::MOV32I(u64 insn) {
41 MOV(*this, insn, GetImm32(insn), true);
42}
43
44} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
new file mode 100644
index 000000000..eda5f177b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 PR,
13 CC,
14};
15
16void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) {
17 switch (index) {
18 case 0:
19 return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)});
20 case 1:
21 return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)});
22 case 2:
23 return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)});
24 case 3:
25 return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)});
26 default:
27 throw LogicError("Unreachable R2P index");
28 }
29}
30
31void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) {
32 union {
33 u64 raw;
34 BitField<8, 8, IR::Reg> src_reg;
35 BitField<40, 1, Mode> mode;
36 BitField<41, 2, u64> byte_selector;
37 } const r2p{insn};
38 const IR::U32 src{v.X(r2p.src_reg)};
39 const IR::U32 count{v.ir.Imm32(1)};
40 const bool pr_mode{r2p.mode == Mode::PR};
41 const u32 num_items{pr_mode ? 7U : 4U};
42 const u32 offset_base{static_cast<u32>(r2p.byte_selector) * 8};
43 for (u32 index = 0; index < num_items; ++index) {
44 const IR::U32 offset{v.ir.Imm32(offset_base + index)};
45 const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))};
46 const IR::U1 src_bit{v.ir.LogicalNot(src_zero)};
47 const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)};
48 const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)};
49 if (pr_mode) {
50 const IR::Pred pred{index};
51 v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)});
52 } else {
53 SetFlag(v.ir, inv_mask_bit, src_bit, index);
54 }
55 }
56}
57} // Anonymous namespace
58
59void TranslatorVisitor::R2P_reg(u64 insn) {
60 R2P(*this, insn, GetReg20(insn));
61}
62
63void TranslatorVisitor::R2P_cbuf(u64 insn) {
64 R2P(*this, insn, GetCbuf(insn));
65}
66
67void TranslatorVisitor::R2P_imm(u64 insn) {
68 R2P(*this, insn, GetImm20(insn));
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
new file mode 100644
index 000000000..20cb2674e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
@@ -0,0 +1,181 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class SpecialRegister : u64 {
12 SR_LANEID = 0,
13 SR_CLOCK = 1,
14 SR_VIRTCFG = 2,
15 SR_VIRTID = 3,
16 SR_PM0 = 4,
17 SR_PM1 = 5,
18 SR_PM2 = 6,
19 SR_PM3 = 7,
20 SR_PM4 = 8,
21 SR_PM5 = 9,
22 SR_PM6 = 10,
23 SR_PM7 = 11,
24 SR12 = 12,
25 SR13 = 13,
26 SR14 = 14,
27 SR_ORDERING_TICKET = 15,
28 SR_PRIM_TYPE = 16,
29 SR_INVOCATION_ID = 17,
30 SR_Y_DIRECTION = 18,
31 SR_THREAD_KILL = 19,
32 SM_SHADER_TYPE = 20,
33 SR_DIRECTCBEWRITEADDRESSLOW = 21,
34 SR_DIRECTCBEWRITEADDRESSHIGH = 22,
35 SR_DIRECTCBEWRITEENABLE = 23,
36 SR_MACHINE_ID_0 = 24,
37 SR_MACHINE_ID_1 = 25,
38 SR_MACHINE_ID_2 = 26,
39 SR_MACHINE_ID_3 = 27,
40 SR_AFFINITY = 28,
41 SR_INVOCATION_INFO = 29,
42 SR_WSCALEFACTOR_XY = 30,
43 SR_WSCALEFACTOR_Z = 31,
44 SR_TID = 32,
45 SR_TID_X = 33,
46 SR_TID_Y = 34,
47 SR_TID_Z = 35,
48 SR_CTA_PARAM = 36,
49 SR_CTAID_X = 37,
50 SR_CTAID_Y = 38,
51 SR_CTAID_Z = 39,
52 SR_NTID = 40,
53 SR_CirQueueIncrMinusOne = 41,
54 SR_NLATC = 42,
55 SR43 = 43,
56 SR_SM_SPA_VERSION = 44,
57 SR_MULTIPASSSHADERINFO = 45,
58 SR_LWINHI = 46,
59 SR_SWINHI = 47,
60 SR_SWINLO = 48,
61 SR_SWINSZ = 49,
62 SR_SMEMSZ = 50,
63 SR_SMEMBANKS = 51,
64 SR_LWINLO = 52,
65 SR_LWINSZ = 53,
66 SR_LMEMLOSZ = 54,
67 SR_LMEMHIOFF = 55,
68 SR_EQMASK = 56,
69 SR_LTMASK = 57,
70 SR_LEMASK = 58,
71 SR_GTMASK = 59,
72 SR_GEMASK = 60,
73 SR_REGALLOC = 61,
74 SR_BARRIERALLOC = 62,
75 SR63 = 63,
76 SR_GLOBALERRORSTATUS = 64,
77 SR65 = 65,
78 SR_WARPERRORSTATUS = 66,
79 SR_WARPERRORSTATUSCLEAR = 67,
80 SR68 = 68,
81 SR69 = 69,
82 SR70 = 70,
83 SR71 = 71,
84 SR_PM_HI0 = 72,
85 SR_PM_HI1 = 73,
86 SR_PM_HI2 = 74,
87 SR_PM_HI3 = 75,
88 SR_PM_HI4 = 76,
89 SR_PM_HI5 = 77,
90 SR_PM_HI6 = 78,
91 SR_PM_HI7 = 79,
92 SR_CLOCKLO = 80,
93 SR_CLOCKHI = 81,
94 SR_GLOBALTIMERLO = 82,
95 SR_GLOBALTIMERHI = 83,
96 SR84 = 84,
97 SR85 = 85,
98 SR86 = 86,
99 SR87 = 87,
100 SR88 = 88,
101 SR89 = 89,
102 SR90 = 90,
103 SR91 = 91,
104 SR92 = 92,
105 SR93 = 93,
106 SR94 = 94,
107 SR95 = 95,
108 SR_HWTASKID = 96,
109 SR_CIRCULARQUEUEENTRYINDEX = 97,
110 SR_CIRCULARQUEUEENTRYADDRESSLOW = 98,
111 SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99,
112};
113
114[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) {
115 switch (special_register) {
116 case SpecialRegister::SR_INVOCATION_ID:
117 return ir.InvocationId();
118 case SpecialRegister::SR_THREAD_KILL:
119 return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))};
120 case SpecialRegister::SR_INVOCATION_INFO:
121 LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO");
122 return ir.Imm32(0x00ff'0000);
123 case SpecialRegister::SR_TID: {
124 const IR::Value tid{ir.LocalInvocationId()};
125 return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)},
126 IR::U32{ir.CompositeExtract(tid, 1)},
127 ir.Imm32(16), ir.Imm32(8)),
128 IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6));
129 }
130 case SpecialRegister::SR_TID_X:
131 return ir.LocalInvocationIdX();
132 case SpecialRegister::SR_TID_Y:
133 return ir.LocalInvocationIdY();
134 case SpecialRegister::SR_TID_Z:
135 return ir.LocalInvocationIdZ();
136 case SpecialRegister::SR_CTAID_X:
137 return ir.WorkgroupIdX();
138 case SpecialRegister::SR_CTAID_Y:
139 return ir.WorkgroupIdY();
140 case SpecialRegister::SR_CTAID_Z:
141 return ir.WorkgroupIdZ();
142 case SpecialRegister::SR_WSCALEFACTOR_XY:
143 LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY");
144 return ir.Imm32(Common::BitCast<u32>(1.0f));
145 case SpecialRegister::SR_WSCALEFACTOR_Z:
146 LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z");
147 return ir.Imm32(Common::BitCast<u32>(1.0f));
148 case SpecialRegister::SR_LANEID:
149 return ir.LaneId();
150 case SpecialRegister::SR_EQMASK:
151 return ir.SubgroupEqMask();
152 case SpecialRegister::SR_LTMASK:
153 return ir.SubgroupLtMask();
154 case SpecialRegister::SR_LEMASK:
155 return ir.SubgroupLeMask();
156 case SpecialRegister::SR_GTMASK:
157 return ir.SubgroupGtMask();
158 case SpecialRegister::SR_GEMASK:
159 return ir.SubgroupGeMask();
160 case SpecialRegister::SR_Y_DIRECTION:
161 return ir.BitCast<IR::U32>(ir.YDirection());
162 case SpecialRegister::SR_AFFINITY:
163 LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY");
164 return ir.Imm32(0); // This is the default value hardware returns.
165 default:
166 throw NotImplementedException("S2R special register {}", special_register);
167 }
168}
169} // Anonymous namespace
170
171void TranslatorVisitor::S2R(u64 insn) {
172 union {
173 u64 raw;
174 BitField<0, 8, IR::Reg> dest_reg;
175 BitField<20, 8, SpecialRegister> src_reg;
176 } const s2r{insn};
177
178 X(s2r.dest_reg, Read(ir, s2r.src_reg));
179}
180
181} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
new file mode 100644
index 000000000..7e26ab359
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -0,0 +1,283 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/opcodes.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11
12[[noreturn]] static void ThrowNotImplemented(Opcode opcode) {
13 throw NotImplementedException("Instruction {} is not implemented", opcode);
14}
15
16void TranslatorVisitor::ATOM_cas(u64) {
17 ThrowNotImplemented(Opcode::ATOM_cas);
18}
19
20void TranslatorVisitor::ATOMS_cas(u64) {
21 ThrowNotImplemented(Opcode::ATOMS_cas);
22}
23
24void TranslatorVisitor::B2R(u64) {
25 ThrowNotImplemented(Opcode::B2R);
26}
27
28void TranslatorVisitor::BPT(u64) {
29 ThrowNotImplemented(Opcode::BPT);
30}
31
32void TranslatorVisitor::BRA(u64) {
33 ThrowNotImplemented(Opcode::BRA);
34}
35
36void TranslatorVisitor::BRK(u64) {
37 ThrowNotImplemented(Opcode::BRK);
38}
39
40void TranslatorVisitor::CAL() {
41 // CAL is a no-op
42}
43
44void TranslatorVisitor::CCTL(u64) {
45 ThrowNotImplemented(Opcode::CCTL);
46}
47
48void TranslatorVisitor::CCTLL(u64) {
49 ThrowNotImplemented(Opcode::CCTLL);
50}
51
52void TranslatorVisitor::CONT(u64) {
53 ThrowNotImplemented(Opcode::CONT);
54}
55
56void TranslatorVisitor::CS2R(u64) {
57 ThrowNotImplemented(Opcode::CS2R);
58}
59
60void TranslatorVisitor::FCHK_reg(u64) {
61 ThrowNotImplemented(Opcode::FCHK_reg);
62}
63
64void TranslatorVisitor::FCHK_cbuf(u64) {
65 ThrowNotImplemented(Opcode::FCHK_cbuf);
66}
67
68void TranslatorVisitor::FCHK_imm(u64) {
69 ThrowNotImplemented(Opcode::FCHK_imm);
70}
71
72void TranslatorVisitor::GETCRSPTR(u64) {
73 ThrowNotImplemented(Opcode::GETCRSPTR);
74}
75
76void TranslatorVisitor::GETLMEMBASE(u64) {
77 ThrowNotImplemented(Opcode::GETLMEMBASE);
78}
79
80void TranslatorVisitor::IDE(u64) {
81 ThrowNotImplemented(Opcode::IDE);
82}
83
84void TranslatorVisitor::IDP_reg(u64) {
85 ThrowNotImplemented(Opcode::IDP_reg);
86}
87
88void TranslatorVisitor::IDP_imm(u64) {
89 ThrowNotImplemented(Opcode::IDP_imm);
90}
91
92void TranslatorVisitor::IMAD_reg(u64) {
93 ThrowNotImplemented(Opcode::IMAD_reg);
94}
95
96void TranslatorVisitor::IMAD_rc(u64) {
97 ThrowNotImplemented(Opcode::IMAD_rc);
98}
99
100void TranslatorVisitor::IMAD_cr(u64) {
101 ThrowNotImplemented(Opcode::IMAD_cr);
102}
103
104void TranslatorVisitor::IMAD_imm(u64) {
105 ThrowNotImplemented(Opcode::IMAD_imm);
106}
107
108void TranslatorVisitor::IMAD32I(u64) {
109 ThrowNotImplemented(Opcode::IMAD32I);
110}
111
112void TranslatorVisitor::IMADSP_reg(u64) {
113 ThrowNotImplemented(Opcode::IMADSP_reg);
114}
115
116void TranslatorVisitor::IMADSP_rc(u64) {
117 ThrowNotImplemented(Opcode::IMADSP_rc);
118}
119
120void TranslatorVisitor::IMADSP_cr(u64) {
121 ThrowNotImplemented(Opcode::IMADSP_cr);
122}
123
124void TranslatorVisitor::IMADSP_imm(u64) {
125 ThrowNotImplemented(Opcode::IMADSP_imm);
126}
127
128void TranslatorVisitor::IMUL_reg(u64) {
129 ThrowNotImplemented(Opcode::IMUL_reg);
130}
131
132void TranslatorVisitor::IMUL_cbuf(u64) {
133 ThrowNotImplemented(Opcode::IMUL_cbuf);
134}
135
136void TranslatorVisitor::IMUL_imm(u64) {
137 ThrowNotImplemented(Opcode::IMUL_imm);
138}
139
140void TranslatorVisitor::IMUL32I(u64) {
141 ThrowNotImplemented(Opcode::IMUL32I);
142}
143
144void TranslatorVisitor::JCAL(u64) {
145 ThrowNotImplemented(Opcode::JCAL);
146}
147
148void TranslatorVisitor::JMP(u64) {
149 ThrowNotImplemented(Opcode::JMP);
150}
151
152void TranslatorVisitor::KIL() {
153 // KIL is a no-op
154}
155
156void TranslatorVisitor::LD(u64) {
157 ThrowNotImplemented(Opcode::LD);
158}
159
160void TranslatorVisitor::LEPC(u64) {
161 ThrowNotImplemented(Opcode::LEPC);
162}
163
164void TranslatorVisitor::LONGJMP(u64) {
165 ThrowNotImplemented(Opcode::LONGJMP);
166}
167
168void TranslatorVisitor::NOP(u64) {
169 // NOP is No-Op.
170}
171
172void TranslatorVisitor::PBK() {
173 // PBK is a no-op
174}
175
176void TranslatorVisitor::PCNT() {
177 // PCNT is a no-op
178}
179
180void TranslatorVisitor::PEXIT(u64) {
181 ThrowNotImplemented(Opcode::PEXIT);
182}
183
184void TranslatorVisitor::PLONGJMP(u64) {
185 ThrowNotImplemented(Opcode::PLONGJMP);
186}
187
188void TranslatorVisitor::PRET(u64) {
189 ThrowNotImplemented(Opcode::PRET);
190}
191
192void TranslatorVisitor::PRMT_reg(u64) {
193 ThrowNotImplemented(Opcode::PRMT_reg);
194}
195
196void TranslatorVisitor::PRMT_rc(u64) {
197 ThrowNotImplemented(Opcode::PRMT_rc);
198}
199
200void TranslatorVisitor::PRMT_cr(u64) {
201 ThrowNotImplemented(Opcode::PRMT_cr);
202}
203
204void TranslatorVisitor::PRMT_imm(u64) {
205 ThrowNotImplemented(Opcode::PRMT_imm);
206}
207
208void TranslatorVisitor::R2B(u64) {
209 ThrowNotImplemented(Opcode::R2B);
210}
211
212void TranslatorVisitor::RAM(u64) {
213 ThrowNotImplemented(Opcode::RAM);
214}
215
216void TranslatorVisitor::RET(u64) {
217 ThrowNotImplemented(Opcode::RET);
218}
219
220void TranslatorVisitor::RTT(u64) {
221 ThrowNotImplemented(Opcode::RTT);
222}
223
224void TranslatorVisitor::SAM(u64) {
225 ThrowNotImplemented(Opcode::SAM);
226}
227
228void TranslatorVisitor::SETCRSPTR(u64) {
229 ThrowNotImplemented(Opcode::SETCRSPTR);
230}
231
232void TranslatorVisitor::SETLMEMBASE(u64) {
233 ThrowNotImplemented(Opcode::SETLMEMBASE);
234}
235
236void TranslatorVisitor::SSY() {
237 // SSY is a no-op
238}
239
240void TranslatorVisitor::ST(u64) {
241 ThrowNotImplemented(Opcode::ST);
242}
243
244void TranslatorVisitor::STP(u64) {
245 ThrowNotImplemented(Opcode::STP);
246}
247
248void TranslatorVisitor::SUATOM_cas(u64) {
249 ThrowNotImplemented(Opcode::SUATOM_cas);
250}
251
252void TranslatorVisitor::SYNC(u64) {
253 ThrowNotImplemented(Opcode::SYNC);
254}
255
256void TranslatorVisitor::TXA(u64) {
257 ThrowNotImplemented(Opcode::TXA);
258}
259
260void TranslatorVisitor::VABSDIFF(u64) {
261 ThrowNotImplemented(Opcode::VABSDIFF);
262}
263
264void TranslatorVisitor::VABSDIFF4(u64) {
265 ThrowNotImplemented(Opcode::VABSDIFF4);
266}
267
268void TranslatorVisitor::VADD(u64) {
269 ThrowNotImplemented(Opcode::VADD);
270}
271
272void TranslatorVisitor::VSET(u64) {
273 ThrowNotImplemented(Opcode::VSET);
274}
275void TranslatorVisitor::VSHL(u64) {
276 ThrowNotImplemented(Opcode::VSHL);
277}
278
279void TranslatorVisitor::VSHR(u64) {
280 ThrowNotImplemented(Opcode::VSHR);
281}
282
283} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
new file mode 100644
index 000000000..01cfad88d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
@@ -0,0 +1,45 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> output_reg; // Not needed on host
16 BitField<39, 1, u64> emit;
17 BitField<40, 1, u64> cut;
18 } const out{insn};
19
20 stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11));
21
22 if (out.emit != 0) {
23 v.ir.EmitVertex(stream_index);
24 }
25 if (out.cut != 0) {
26 v.ir.EndPrimitive(stream_index);
27 }
28 // Host doesn't need the output register, but we can write to it to avoid undefined reads
29 v.X(out.dest_reg, v.ir.Imm32(0));
30}
31} // Anonymous namespace
32
33void TranslatorVisitor::OUT_reg(u64 insn) {
34 OUT(*this, insn, GetReg20(insn));
35}
36
37void TranslatorVisitor::OUT_cbuf(u64 insn) {
38 OUT(*this, insn, GetCbuf(insn));
39}
40
41void TranslatorVisitor::OUT_imm(u64 insn) {
42 OUT(*this, insn, GetImm20(insn));
43}
44
45} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
new file mode 100644
index 000000000..b4767afb5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
@@ -0,0 +1,46 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 Default,
13 CovMask,
14 Covered,
15 Offset,
16 CentroidOffset,
17 MyIndex,
18};
19} // Anonymous namespace
20
21void TranslatorVisitor::PIXLD(u64 insn) {
22 union {
23 u64 raw;
24 BitField<31, 3, Mode> mode;
25 BitField<0, 8, IR::Reg> dest_reg;
26 BitField<8, 8, IR::Reg> addr_reg;
27 BitField<20, 8, s64> addr_offset;
28 BitField<45, 3, IR::Pred> dest_pred;
29 } const pixld{insn};
30
31 if (pixld.dest_pred != IR::Pred::PT) {
32 throw NotImplementedException("Destination predicate");
33 }
34 if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) {
35 throw NotImplementedException("Non-zero source register");
36 }
37 switch (pixld.mode) {
38 case Mode::MyIndex:
39 X(pixld.dest_reg, ir.SampleId());
40 break;
41 default:
42 throw NotImplementedException("Mode {}", pixld.mode.Value());
43 }
44}
45
46} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
new file mode 100644
index 000000000..75d1fa8c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
@@ -0,0 +1,38 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::PSETP(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 3, IR::Pred> dest_pred_b;
15 BitField<3, 3, IR::Pred> dest_pred_a;
16 BitField<12, 3, IR::Pred> pred_a;
17 BitField<15, 1, u64> neg_pred_a;
18 BitField<24, 2, BooleanOp> bop_1;
19 BitField<29, 3, IR::Pred> pred_b;
20 BitField<32, 1, u64> neg_pred_b;
21 BitField<39, 3, IR::Pred> pred_c;
22 BitField<42, 1, u64> neg_pred_c;
23 BitField<45, 2, BooleanOp> bop_2;
24 } const pset{insn};
25
26 const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
27 const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
28 const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
29
30 const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
31 const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)};
32 const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)};
33 const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)};
34
35 ir.SetPred(pset.dest_pred_a, result_a);
36 ir.SetPred(pset.dest_pred_b, result_b);
37}
38} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
new file mode 100644
index 000000000..b02789874
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
@@ -0,0 +1,53 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::PSET(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<12, 3, IR::Pred> pred_a;
16 BitField<15, 1, u64> neg_pred_a;
17 BitField<24, 2, BooleanOp> bop_1;
18 BitField<29, 3, IR::Pred> pred_b;
19 BitField<32, 1, u64> neg_pred_b;
20 BitField<39, 3, IR::Pred> pred_c;
21 BitField<42, 1, u64> neg_pred_c;
22 BitField<44, 1, u64> bf;
23 BitField<45, 2, BooleanOp> bop_2;
24 BitField<47, 1, u64> cc;
25 } const pset{insn};
26
27 const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
28 const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
29 const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
30
31 const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
32 const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)};
33
34 const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)};
35 const IR::U32 zero{ir.Imm32(0)};
36
37 const IR::U32 result{ir.Select(res_2, true_result, zero)};
38
39 X(pset.dest_reg, result);
40 if (pset.cc != 0) {
41 const IR::U1 is_zero{ir.IEqual(result, zero)};
42 SetZFlag(is_zero);
43 if (pset.bf != 0) {
44 ResetSFlag();
45 } else {
46 SetSFlag(ir.LogicalNot(is_zero));
47 }
48 ResetOFlag();
49 ResetCFlag();
50 }
51}
52
53} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
new file mode 100644
index 000000000..93baa75a9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11
12void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_reg;
17 BitField<39, 3, IR::Pred> pred;
18 BitField<42, 1, u64> neg_pred;
19 } const sel{insn};
20
21 const IR::U1 pred = v.ir.GetPred(sel.pred);
22 IR::U32 op_a{v.X(sel.src_reg)};
23 IR::U32 op_b{src};
24 if (sel.neg_pred != 0) {
25 std::swap(op_a, op_b);
26 }
27 const IR::U32 result{v.ir.Select(pred, op_a, op_b)};
28
29 v.X(sel.dest_reg, result);
30}
31} // Anonymous namespace
32
33void TranslatorVisitor::SEL_reg(u64 insn) {
34 SEL(*this, insn, GetReg20(insn));
35}
36
37void TranslatorVisitor::SEL_cbuf(u64 insn) {
38 SEL(*this, insn, GetCbuf(insn));
39}
40
41void TranslatorVisitor::SEL_imm(u64 insn) {
42 SEL(*this, insn, GetImm20(insn));
43}
44} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
new file mode 100644
index 000000000..63b588ad4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
@@ -0,0 +1,205 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bit>
7
8#include "common/bit_field.h"
9#include "common/common_types.h"
10#include "shader_recompiler/frontend/ir/modifiers.h"
11#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
12
13namespace Shader::Maxwell {
14namespace {
15enum class Type : u64 {
16 _1D,
17 BUFFER_1D,
18 ARRAY_1D,
19 _2D,
20 ARRAY_2D,
21 _3D,
22};
23
24enum class Size : u64 {
25 U32,
26 S32,
27 U64,
28 S64,
29 F32FTZRN,
30 F16x2FTZRN,
31 SD32,
32 SD64,
33};
34
35enum class AtomicOp : u64 {
36 ADD,
37 MIN,
38 MAX,
39 INC,
40 DEC,
41 AND,
42 OR,
43 XOR,
44 EXCH,
45};
46
47enum class Clamp : u64 {
48 IGN,
49 Default,
50 TRAP,
51};
52
53TextureType GetType(Type type) {
54 switch (type) {
55 case Type::_1D:
56 return TextureType::Color1D;
57 case Type::BUFFER_1D:
58 return TextureType::Buffer;
59 case Type::ARRAY_1D:
60 return TextureType::ColorArray1D;
61 case Type::_2D:
62 return TextureType::Color2D;
63 case Type::ARRAY_2D:
64 return TextureType::ColorArray2D;
65 case Type::_3D:
66 return TextureType::Color3D;
67 }
68 throw NotImplementedException("Invalid type {}", type);
69}
70
71IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
72 switch (type) {
73 case Type::_1D:
74 case Type::BUFFER_1D:
75 return v.X(reg);
76 case Type::_2D:
77 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
78 case Type::_3D:
79 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
80 default:
81 break;
82 }
83 throw NotImplementedException("Invalid type {}", type);
84}
85
86IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords,
87 const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op,
88 bool is_signed) {
89 switch (op) {
90 case AtomicOp::ADD:
91 return ir.ImageAtomicIAdd(handle, coords, op_b, info);
92 case AtomicOp::MIN:
93 return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info);
94 case AtomicOp::MAX:
95 return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info);
96 case AtomicOp::INC:
97 return ir.ImageAtomicInc(handle, coords, op_b, info);
98 case AtomicOp::DEC:
99 return ir.ImageAtomicDec(handle, coords, op_b, info);
100 case AtomicOp::AND:
101 return ir.ImageAtomicAnd(handle, coords, op_b, info);
102 case AtomicOp::OR:
103 return ir.ImageAtomicOr(handle, coords, op_b, info);
104 case AtomicOp::XOR:
105 return ir.ImageAtomicXor(handle, coords, op_b, info);
106 case AtomicOp::EXCH:
107 return ir.ImageAtomicExchange(handle, coords, op_b, info);
108 default:
109 throw NotImplementedException("Atomic Operation {}", op);
110 }
111}
112
113ImageFormat Format(Size size) {
114 switch (size) {
115 case Size::U32:
116 case Size::S32:
117 case Size::SD32:
118 return ImageFormat::R32_UINT;
119 default:
120 break;
121 }
122 throw NotImplementedException("Invalid size {}", size);
123}
124
125bool IsSizeInt32(Size size) {
126 switch (size) {
127 case Size::U32:
128 case Size::S32:
129 case Size::SD32:
130 return true;
131 default:
132 return false;
133 }
134}
135
136void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg,
137 IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type,
138 u64 bound_offset, bool is_bindless, bool write_result) {
139 if (clamp != Clamp::IGN) {
140 throw NotImplementedException("Clamp {}", clamp);
141 }
142 if (!IsSizeInt32(size)) {
143 throw NotImplementedException("Size {}", size);
144 }
145 const bool is_signed{size == Size::S32};
146 const ImageFormat format{Format(size)};
147 const TextureType tex_type{GetType(type)};
148 const IR::Value coords{MakeCoords(v, coord_reg, type)};
149
150 const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg)
151 : v.ir.Imm32(static_cast<u32>(bound_offset * 4))};
152 IR::TextureInstInfo info{};
153 info.type.Assign(tex_type);
154 info.image_format.Assign(format);
155
156 // TODO: float/64-bit operand
157 const IR::Value op_b{v.X(operand_reg)};
158 const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)};
159
160 if (write_result) {
161 v.X(dest_reg, IR::U32{color});
162 }
163}
164} // Anonymous namespace
165
166void TranslatorVisitor::SUATOM(u64 insn) {
167 union {
168 u64 raw;
169 BitField<54, 1, u64> is_bindless;
170 BitField<29, 4, AtomicOp> op;
171 BitField<33, 3, Type> type;
172 BitField<51, 3, Size> size;
173 BitField<49, 2, Clamp> clamp;
174 BitField<0, 8, IR::Reg> dest_reg;
175 BitField<8, 8, IR::Reg> coord_reg;
176 BitField<20, 8, IR::Reg> operand_reg;
177 BitField<36, 13, u64> bound_offset; // !is_bindless
178 BitField<39, 8, IR::Reg> bindless_reg; // is_bindless
179 } const suatom{insn};
180
181 ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg,
182 suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset,
183 suatom.is_bindless != 0, true);
184}
185
186void TranslatorVisitor::SURED(u64 insn) {
187 // TODO: confirm offsets
188 union {
189 u64 raw;
190 BitField<51, 1, u64> is_bound;
191 BitField<21, 3, AtomicOp> op;
192 BitField<33, 3, Type> type;
193 BitField<20, 3, Size> size;
194 BitField<49, 2, Clamp> clamp;
195 BitField<0, 8, IR::Reg> operand_reg;
196 BitField<8, 8, IR::Reg> coord_reg;
197 BitField<36, 13, u64> bound_offset; // is_bound
198 BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
199 } const sured{insn};
200 ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg,
201 sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset,
202 sured.is_bound == 0, false);
203}
204
205} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
new file mode 100644
index 000000000..681220a8d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
@@ -0,0 +1,281 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bit>
7
8#include "common/bit_field.h"
9#include "common/common_types.h"
10#include "shader_recompiler/frontend/ir/modifiers.h"
11#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
12
13namespace Shader::Maxwell {
14namespace {
15enum class Type : u64 {
16 _1D,
17 BUFFER_1D,
18 ARRAY_1D,
19 _2D,
20 ARRAY_2D,
21 _3D,
22};
23
24constexpr unsigned R = 1 << 0;
25constexpr unsigned G = 1 << 1;
26constexpr unsigned B = 1 << 2;
27constexpr unsigned A = 1 << 3;
28
29constexpr std::array MASK{
30 0U, //
31 R, //
32 G, //
33 R | G, //
34 B, //
35 R | B, //
36 G | B, //
37 R | G | B, //
38 A, //
39 R | A, //
40 G | A, //
41 R | G | A, //
42 B | A, //
43 R | B | A, //
44 G | B | A, //
45 R | G | B | A, //
46};
47
48enum class Size : u64 {
49 U8,
50 S8,
51 U16,
52 S16,
53 B32,
54 B64,
55 B128,
56};
57
58enum class Clamp : u64 {
59 IGN,
60 Default,
61 TRAP,
62};
63
64// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators
65enum class LoadCache : u64 {
66 CA, // Cache at all levels, likely to be accessed again
67 CG, // Cache at global level (L2 and below, not L1)
68 CI, // ???
69 CV, // Don't cache and fetch again (volatile)
70};
71
72enum class StoreCache : u64 {
73 WB, // Cache write-back all coherent levels
74 CG, // Cache at global level (L2 and below, not L1)
75 CS, // Cache streaming, likely to be accessed once
76 WT, // Cache write-through (to system memory, volatile?)
77};
78
79ImageFormat Format(Size size) {
80 switch (size) {
81 case Size::U8:
82 return ImageFormat::R8_UINT;
83 case Size::S8:
84 return ImageFormat::R8_SINT;
85 case Size::U16:
86 return ImageFormat::R16_UINT;
87 case Size::S16:
88 return ImageFormat::R16_SINT;
89 case Size::B32:
90 return ImageFormat::R32_UINT;
91 case Size::B64:
92 return ImageFormat::R32G32_UINT;
93 case Size::B128:
94 return ImageFormat::R32G32B32A32_UINT;
95 }
96 throw NotImplementedException("Invalid size {}", size);
97}
98
99int SizeInRegs(Size size) {
100 switch (size) {
101 case Size::U8:
102 case Size::S8:
103 case Size::U16:
104 case Size::S16:
105 case Size::B32:
106 return 1;
107 case Size::B64:
108 return 2;
109 case Size::B128:
110 return 4;
111 }
112 throw NotImplementedException("Invalid size {}", size);
113}
114
115TextureType GetType(Type type) {
116 switch (type) {
117 case Type::_1D:
118 return TextureType::Color1D;
119 case Type::BUFFER_1D:
120 return TextureType::Buffer;
121 case Type::ARRAY_1D:
122 return TextureType::ColorArray1D;
123 case Type::_2D:
124 return TextureType::Color2D;
125 case Type::ARRAY_2D:
126 return TextureType::ColorArray2D;
127 case Type::_3D:
128 return TextureType::Color3D;
129 }
130 throw NotImplementedException("Invalid type {}", type);
131}
132
133IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
134 const auto array{[&](int index) {
135 return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16));
136 }};
137 switch (type) {
138 case Type::_1D:
139 case Type::BUFFER_1D:
140 return v.X(reg);
141 case Type::ARRAY_1D:
142 return v.ir.CompositeConstruct(v.X(reg), array(1));
143 case Type::_2D:
144 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
145 case Type::ARRAY_2D:
146 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2));
147 case Type::_3D:
148 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
149 }
150 throw NotImplementedException("Invalid type {}", type);
151}
152
153unsigned SwizzleMask(u64 swizzle) {
154 if (swizzle == 0 || swizzle >= MASK.size()) {
155 throw NotImplementedException("Invalid swizzle {}", swizzle);
156 }
157 return MASK[swizzle];
158}
159
160IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) {
161 std::array<IR::U32, 4> colors;
162 for (int i = 0; i < num_regs; ++i) {
163 colors[static_cast<size_t>(i)] = ir.GetReg(reg + i);
164 }
165 for (int i = num_regs; i < 4; ++i) {
166 colors[static_cast<size_t>(i)] = ir.Imm32(0);
167 }
168 return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]);
169}
170} // Anonymous namespace
171
172void TranslatorVisitor::SULD(u64 insn) {
173 union {
174 u64 raw;
175 BitField<51, 1, u64> is_bound;
176 BitField<52, 1, u64> d;
177 BitField<23, 1, u64> ba;
178 BitField<33, 3, Type> type;
179 BitField<24, 2, LoadCache> cache;
180 BitField<20, 3, Size> size; // .D
181 BitField<20, 4, u64> swizzle; // .P
182 BitField<49, 2, Clamp> clamp;
183 BitField<0, 8, IR::Reg> dest_reg;
184 BitField<8, 8, IR::Reg> coord_reg;
185 BitField<36, 13, u64> bound_offset; // is_bound
186 BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
187 } const suld{insn};
188
189 if (suld.clamp != Clamp::IGN) {
190 throw NotImplementedException("Clamp {}", suld.clamp.Value());
191 }
192 if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) {
193 throw NotImplementedException("Cache {}", suld.cache.Value());
194 }
195 const bool is_typed{suld.d != 0};
196 if (is_typed && suld.ba != 0) {
197 throw NotImplementedException("BA");
198 }
199
200 const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless};
201 const TextureType type{GetType(suld.type)};
202 const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)};
203 const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4))
204 : X(suld.bindless_reg)};
205 IR::TextureInstInfo info{};
206 info.type.Assign(type);
207 info.image_format.Assign(format);
208
209 const IR::Value result{ir.ImageRead(handle, coords, info)};
210 IR::Reg dest_reg{suld.dest_reg};
211 if (is_typed) {
212 const int num_regs{SizeInRegs(suld.size)};
213 for (int i = 0; i < num_regs; ++i) {
214 X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
215 }
216 } else {
217 const unsigned mask{SwizzleMask(suld.swizzle)};
218 const int bits{std::popcount(mask)};
219 if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast<size_t>(bits))) {
220 throw NotImplementedException("Unaligned destination register");
221 }
222 for (unsigned component = 0; component < 4; ++component) {
223 if (((mask >> component) & 1) == 0) {
224 continue;
225 }
226 X(dest_reg, IR::U32{ir.CompositeExtract(result, component)});
227 ++dest_reg;
228 }
229 }
230}
231
232void TranslatorVisitor::SUST(u64 insn) {
233 union {
234 u64 raw;
235 BitField<51, 1, u64> is_bound;
236 BitField<52, 1, u64> d;
237 BitField<23, 1, u64> ba;
238 BitField<33, 3, Type> type;
239 BitField<24, 2, StoreCache> cache;
240 BitField<20, 3, Size> size; // .D
241 BitField<20, 4, u64> swizzle; // .P
242 BitField<49, 2, Clamp> clamp;
243 BitField<0, 8, IR::Reg> data_reg;
244 BitField<8, 8, IR::Reg> coord_reg;
245 BitField<36, 13, u64> bound_offset; // is_bound
246 BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
247 } const sust{insn};
248
249 if (sust.clamp != Clamp::IGN) {
250 throw NotImplementedException("Clamp {}", sust.clamp.Value());
251 }
252 if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) {
253 throw NotImplementedException("Cache {}", sust.cache.Value());
254 }
255 const bool is_typed{sust.d != 0};
256 if (is_typed && sust.ba != 0) {
257 throw NotImplementedException("BA");
258 }
259 const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless};
260 const TextureType type{GetType(sust.type)};
261 const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)};
262 const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4))
263 : X(sust.bindless_reg)};
264 IR::TextureInstInfo info{};
265 info.type.Assign(type);
266 info.image_format.Assign(format);
267
268 IR::Value color;
269 if (is_typed) {
270 color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size));
271 } else {
272 const unsigned mask{SwizzleMask(sust.swizzle)};
273 if (mask != 0xf) {
274 throw NotImplementedException("Non-full mask");
275 }
276 color = MakeColor(ir, sust.data_reg, 4);
277 }
278 ir.ImageWrite(handle, coords, color, info);
279}
280
281} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
new file mode 100644
index 000000000..0046b5edd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
@@ -0,0 +1,236 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Blod : u64 {
15 None,
16 LZ,
17 LB,
18 LL,
19 INVALIDBLOD4,
20 INVALIDBLOD5,
21 LBA,
22 LLA,
23};
24
25enum class TextureType : u64 {
26 _1D,
27 ARRAY_1D,
28 _2D,
29 ARRAY_2D,
30 _3D,
31 ARRAY_3D,
32 CUBE,
33 ARRAY_CUBE,
34};
35
36Shader::TextureType GetType(TextureType type) {
37 switch (type) {
38 case TextureType::_1D:
39 return Shader::TextureType::Color1D;
40 case TextureType::ARRAY_1D:
41 return Shader::TextureType::ColorArray1D;
42 case TextureType::_2D:
43 return Shader::TextureType::Color2D;
44 case TextureType::ARRAY_2D:
45 return Shader::TextureType::ColorArray2D;
46 case TextureType::_3D:
47 return Shader::TextureType::Color3D;
48 case TextureType::ARRAY_3D:
49 throw NotImplementedException("3D array texture type");
50 case TextureType::CUBE:
51 return Shader::TextureType::ColorCube;
52 case TextureType::ARRAY_CUBE:
53 return Shader::TextureType::ColorArrayCube;
54 }
55 throw NotImplementedException("Invalid texture type {}", type);
56}
57
58IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
59 const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
60 switch (type) {
61 case TextureType::_1D:
62 return v.F(reg);
63 case TextureType::ARRAY_1D:
64 return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
65 case TextureType::_2D:
66 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
67 case TextureType::ARRAY_2D:
68 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
69 case TextureType::_3D:
70 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
71 case TextureType::ARRAY_3D:
72 throw NotImplementedException("3D array texture type");
73 case TextureType::CUBE:
74 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
75 case TextureType::ARRAY_CUBE:
76 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
77 }
78 throw NotImplementedException("Invalid texture type {}", type);
79}
80
81IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) {
82 switch (blod) {
83 case Blod::None:
84 return v.ir.Imm32(0.0f);
85 case Blod::LZ:
86 return v.ir.Imm32(0.0f);
87 case Blod::LB:
88 case Blod::LL:
89 case Blod::LBA:
90 case Blod::LLA:
91 return v.F(reg++);
92 case Blod::INVALIDBLOD4:
93 case Blod::INVALIDBLOD5:
94 break;
95 }
96 throw NotImplementedException("Invalid blod {}", blod);
97}
98
99IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
100 const IR::U32 value{v.X(reg++)};
101 switch (type) {
102 case TextureType::_1D:
103 case TextureType::ARRAY_1D:
104 return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
105 case TextureType::_2D:
106 case TextureType::ARRAY_2D:
107 return v.ir.CompositeConstruct(
108 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
109 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
110 case TextureType::_3D:
111 case TextureType::ARRAY_3D:
112 return v.ir.CompositeConstruct(
113 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
114 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
115 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
116 case TextureType::CUBE:
117 case TextureType::ARRAY_CUBE:
118 throw NotImplementedException("Illegal offset on CUBE sample");
119 }
120 throw NotImplementedException("Invalid texture type {}", type);
121}
122
123bool HasExplicitLod(Blod blod) {
124 switch (blod) {
125 case Blod::LL:
126 case Blod::LLA:
127 case Blod::LZ:
128 return true;
129 default:
130 return false;
131 }
132}
133
134void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
135 std::optional<u32> cbuf_offset) {
136 union {
137 u64 raw;
138 BitField<35, 1, u64> ndv;
139 BitField<49, 1, u64> nodep;
140 BitField<50, 1, u64> dc;
141 BitField<51, 3, IR::Pred> sparse_pred;
142 BitField<0, 8, IR::Reg> dest_reg;
143 BitField<8, 8, IR::Reg> coord_reg;
144 BitField<20, 8, IR::Reg> meta_reg;
145 BitField<28, 3, TextureType> type;
146 BitField<31, 4, u64> mask;
147 } const tex{insn};
148
149 if (lc) {
150 throw NotImplementedException("LC");
151 }
152 const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)};
153
154 IR::Reg meta_reg{tex.meta_reg};
155 IR::Value handle;
156 IR::Value offset;
157 IR::F32 dref;
158 IR::F32 lod_clamp;
159 if (cbuf_offset) {
160 handle = v.ir.Imm32(*cbuf_offset);
161 } else {
162 handle = v.X(meta_reg++);
163 }
164 const IR::F32 lod{MakeLod(v, meta_reg, blod)};
165 if (aoffi) {
166 offset = MakeOffset(v, meta_reg, tex.type);
167 }
168 if (tex.dc != 0) {
169 dref = v.F(meta_reg++);
170 }
171 IR::TextureInstInfo info{};
172 info.type.Assign(GetType(tex.type));
173 info.is_depth.Assign(tex.dc != 0 ? 1 : 0);
174 info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
175 info.has_lod_clamp.Assign(lc ? 1 : 0);
176
177 const IR::Value sample{[&]() -> IR::Value {
178 if (tex.dc == 0) {
179 if (HasExplicitLod(blod)) {
180 return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info);
181 } else {
182 return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info);
183 }
184 }
185 if (HasExplicitLod(blod)) {
186 return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info);
187 } else {
188 return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp,
189 info);
190 }
191 }()};
192
193 IR::Reg dest_reg{tex.dest_reg};
194 for (int element = 0; element < 4; ++element) {
195 if (((tex.mask >> element) & 1) == 0) {
196 continue;
197 }
198 IR::F32 value;
199 if (tex.dc != 0) {
200 value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
201 } else {
202 value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))};
203 }
204 v.F(dest_reg, value);
205 ++dest_reg;
206 }
207 if (tex.sparse_pred != IR::Pred::PT) {
208 v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
209 }
210}
211} // Anonymous namespace
212
213void TranslatorVisitor::TEX(u64 insn) {
214 union {
215 u64 raw;
216 BitField<54, 1, u64> aoffi;
217 BitField<55, 3, Blod> blod;
218 BitField<58, 1, u64> lc;
219 BitField<36, 13, u64> cbuf_offset;
220 } const tex{insn};
221
222 Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4));
223}
224
225void TranslatorVisitor::TEX_b(u64 insn) {
226 union {
227 u64 raw;
228 BitField<36, 1, u64> aoffi;
229 BitField<37, 3, Blod> blod;
230 BitField<40, 1, u64> lc;
231 } const tex{insn};
232
233 Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt);
234}
235
236} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
new file mode 100644
index 000000000..154e7f1a1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
@@ -0,0 +1,266 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Precision : u64 {
15 F16,
16 F32,
17};
18
19union Encoding {
20 u64 raw;
21 BitField<59, 1, Precision> precision;
22 BitField<53, 4, u64> encoding;
23 BitField<49, 1, u64> nodep;
24 BitField<28, 8, IR::Reg> dest_reg_b;
25 BitField<0, 8, IR::Reg> dest_reg_a;
26 BitField<8, 8, IR::Reg> src_reg_a;
27 BitField<20, 8, IR::Reg> src_reg_b;
28 BitField<36, 13, u64> cbuf_offset;
29 BitField<50, 3, u64> swizzle;
30};
31
32constexpr unsigned R = 1;
33constexpr unsigned G = 2;
34constexpr unsigned B = 4;
35constexpr unsigned A = 8;
36
37constexpr std::array RG_LUT{
38 R, //
39 G, //
40 B, //
41 A, //
42 R | G, //
43 R | A, //
44 G | A, //
45 B | A, //
46};
47
48constexpr std::array RGBA_LUT{
49 R | G | B, //
50 R | G | A, //
51 R | B | A, //
52 G | B | A, //
53 R | G | B | A, //
54};
55
56void CheckAlignment(IR::Reg reg, size_t alignment) {
57 if (!IR::IsAligned(reg, alignment)) {
58 throw NotImplementedException("Unaligned source register {}", reg);
59 }
60}
61
62template <typename... Args>
63IR::Value Composite(TranslatorVisitor& v, Args... regs) {
64 return v.ir.CompositeConstruct(v.F(regs)...);
65}
66
67IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
68 return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16)));
69}
70
71IR::Value Sample(TranslatorVisitor& v, u64 insn) {
72 const Encoding texs{insn};
73 const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))};
74 const IR::F32 zero{v.ir.Imm32(0.0f)};
75 const IR::Reg reg_a{texs.src_reg_a};
76 const IR::Reg reg_b{texs.src_reg_b};
77 IR::TextureInstInfo info{};
78 if (texs.precision == Precision::F16) {
79 info.relaxed_precision.Assign(1);
80 }
81 switch (texs.encoding) {
82 case 0: // 1D.LZ
83 info.type.Assign(TextureType::Color1D);
84 return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info);
85 case 1: // 2D
86 info.type.Assign(TextureType::Color2D);
87 return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info);
88 case 2: // 2D.LZ
89 info.type.Assign(TextureType::Color2D);
90 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info);
91 case 3: // 2D.LL
92 CheckAlignment(reg_a, 2);
93 info.type.Assign(TextureType::Color2D);
94 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {},
95 info);
96 case 4: // 2D.DC
97 CheckAlignment(reg_a, 2);
98 info.type.Assign(TextureType::Color2D);
99 info.is_depth.Assign(1);
100 return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
101 {}, {}, {}, info);
102 case 5: // 2D.LL.DC
103 CheckAlignment(reg_a, 2);
104 CheckAlignment(reg_b, 2);
105 info.type.Assign(TextureType::Color2D);
106 info.is_depth.Assign(1);
107 return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1),
108 v.F(reg_b + 1), v.F(reg_b), {}, info);
109 case 6: // 2D.LZ.DC
110 CheckAlignment(reg_a, 2);
111 info.type.Assign(TextureType::Color2D);
112 info.is_depth.Assign(1);
113 return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
114 zero, {}, info);
115 case 7: // ARRAY_2D
116 CheckAlignment(reg_a, 2);
117 info.type.Assign(TextureType::ColorArray2D);
118 return v.ir.ImageSampleImplicitLod(
119 handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
120 {}, {}, {}, info);
121 case 8: // ARRAY_2D.LZ
122 CheckAlignment(reg_a, 2);
123 info.type.Assign(TextureType::ColorArray2D);
124 return v.ir.ImageSampleExplicitLod(
125 handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
126 zero, {}, info);
127 case 9: // ARRAY_2D.LZ.DC
128 CheckAlignment(reg_a, 2);
129 CheckAlignment(reg_b, 2);
130 info.type.Assign(TextureType::ColorArray2D);
131 info.is_depth.Assign(1);
132 return v.ir.ImageSampleDrefExplicitLod(
133 handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
134 v.F(reg_b + 1), zero, {}, info);
135 case 10: // 3D
136 CheckAlignment(reg_a, 2);
137 info.type.Assign(TextureType::Color3D);
138 return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
139 {}, info);
140 case 11: // 3D.LZ
141 CheckAlignment(reg_a, 2);
142 info.type.Assign(TextureType::Color3D);
143 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {},
144 info);
145 case 12: // CUBE
146 CheckAlignment(reg_a, 2);
147 info.type.Assign(TextureType::ColorCube);
148 return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
149 {}, info);
150 case 13: // CUBE.LL
151 CheckAlignment(reg_a, 2);
152 CheckAlignment(reg_b, 2);
153 info.type.Assign(TextureType::ColorCube);
154 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b),
155 v.F(reg_b + 1), {}, info);
156 default:
157 throw NotImplementedException("Illegal encoding {}", texs.encoding.Value());
158 }
159}
160
161unsigned Swizzle(u64 insn) {
162 const Encoding texs{insn};
163 const size_t encoding{texs.swizzle};
164 if (texs.dest_reg_b == IR::Reg::RZ) {
165 if (encoding >= RG_LUT.size()) {
166 throw NotImplementedException("Illegal RG encoding {}", encoding);
167 }
168 return RG_LUT[encoding];
169 } else {
170 if (encoding >= RGBA_LUT.size()) {
171 throw NotImplementedException("Illegal RGBA encoding {}", encoding);
172 }
173 return RGBA_LUT[encoding];
174 }
175}
176
177IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
178 const bool is_shadow{sample.Type() == IR::Type::F32};
179 if (is_shadow) {
180 const bool is_alpha{component == 3};
181 return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample};
182 } else {
183 return IR::F32{v.ir.CompositeExtract(sample, component)};
184 }
185}
186
187IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
188 const Encoding texs{insn};
189 switch (index) {
190 case 0:
191 return texs.dest_reg_a;
192 case 1:
193 CheckAlignment(texs.dest_reg_a, 2);
194 return texs.dest_reg_a + 1;
195 case 2:
196 return texs.dest_reg_b;
197 case 3:
198 CheckAlignment(texs.dest_reg_b, 2);
199 return texs.dest_reg_b + 1;
200 }
201 throw LogicError("Invalid store index {}", index);
202}
203
204void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
205 const unsigned swizzle{Swizzle(insn)};
206 unsigned store_index{0};
207 for (unsigned component = 0; component < 4; ++component) {
208 if (((swizzle >> component) & 1) == 0) {
209 continue;
210 }
211 const IR::Reg dest{RegStoreComponent32(insn, store_index)};
212 v.F(dest, Extract(v, sample, component));
213 ++store_index;
214 }
215}
216
217IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
218 return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
219}
220
221void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
222 const unsigned swizzle{Swizzle(insn)};
223 unsigned store_index{0};
224 std::array<IR::F32, 4> swizzled;
225 for (unsigned component = 0; component < 4; ++component) {
226 if (((swizzle >> component) & 1) == 0) {
227 continue;
228 }
229 swizzled[store_index] = Extract(v, sample, component);
230 ++store_index;
231 }
232 const IR::F32 zero{v.ir.Imm32(0.0f)};
233 const Encoding texs{insn};
234 switch (store_index) {
235 case 1:
236 v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero));
237 break;
238 case 2:
239 case 3:
240 case 4:
241 v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
242 switch (store_index) {
243 case 2:
244 break;
245 case 3:
246 v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero));
247 break;
248 case 4:
249 v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
250 break;
251 }
252 break;
253 }
254}
255} // Anonymous namespace
256
257void TranslatorVisitor::TEXS(u64 insn) {
258 const IR::Value sample{Sample(*this, insn)};
259 if (Encoding{insn}.precision == Precision::F32) {
260 Store32(*this, insn, sample);
261 } else {
262 Store16(*this, insn, sample);
263 }
264}
265
266} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
new file mode 100644
index 000000000..218cbc1a8
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
@@ -0,0 +1,208 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26enum class OffsetType : u64 {
27 None = 0,
28 AOFFI,
29 PTP,
30 Invalid,
31};
32
33enum class ComponentType : u64 {
34 R = 0,
35 G = 1,
36 B = 2,
37 A = 3,
38};
39
40Shader::TextureType GetType(TextureType type) {
41 switch (type) {
42 case TextureType::_1D:
43 return Shader::TextureType::Color1D;
44 case TextureType::ARRAY_1D:
45 return Shader::TextureType::ColorArray1D;
46 case TextureType::_2D:
47 return Shader::TextureType::Color2D;
48 case TextureType::ARRAY_2D:
49 return Shader::TextureType::ColorArray2D;
50 case TextureType::_3D:
51 return Shader::TextureType::Color3D;
52 case TextureType::ARRAY_3D:
53 throw NotImplementedException("3D array texture type");
54 case TextureType::CUBE:
55 return Shader::TextureType::ColorCube;
56 case TextureType::ARRAY_CUBE:
57 return Shader::TextureType::ColorArrayCube;
58 }
59 throw NotImplementedException("Invalid texture type {}", type);
60}
61
62IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
63 const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
64 switch (type) {
65 case TextureType::_1D:
66 return v.F(reg);
67 case TextureType::ARRAY_1D:
68 return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
69 case TextureType::_2D:
70 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
71 case TextureType::ARRAY_2D:
72 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
73 case TextureType::_3D:
74 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
75 case TextureType::ARRAY_3D:
76 throw NotImplementedException("3D array texture type");
77 case TextureType::CUBE:
78 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
79 case TextureType::ARRAY_CUBE:
80 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
81 }
82 throw NotImplementedException("Invalid texture type {}", type);
83}
84
85IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
86 const IR::U32 value{v.X(reg++)};
87 switch (type) {
88 case TextureType::_1D:
89 case TextureType::ARRAY_1D:
90 return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true);
91 case TextureType::_2D:
92 case TextureType::ARRAY_2D:
93 return v.ir.CompositeConstruct(
94 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
95 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
96 case TextureType::_3D:
97 case TextureType::ARRAY_3D:
98 return v.ir.CompositeConstruct(
99 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
100 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true),
101 v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true));
102 case TextureType::CUBE:
103 case TextureType::ARRAY_CUBE:
104 throw NotImplementedException("Illegal offset on CUBE sample");
105 }
106 throw NotImplementedException("Invalid texture type {}", type);
107}
108
109std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) {
110 const IR::U32 value1{v.X(reg++)};
111 const IR::U32 value2{v.X(reg++)};
112 const IR::U32 bitsize{v.ir.Imm32(6)};
113 const auto make_vector{[&v, &bitsize](const IR::U32& value) {
114 return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true),
115 v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true),
116 v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true),
117 v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true));
118 }};
119 return {make_vector(value1), make_vector(value2)};
120}
121
122void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type,
123 bool is_bindless) {
124 union {
125 u64 raw;
126 BitField<35, 1, u64> ndv;
127 BitField<49, 1, u64> nodep;
128 BitField<50, 1, u64> dc;
129 BitField<51, 3, IR::Pred> sparse_pred;
130 BitField<0, 8, IR::Reg> dest_reg;
131 BitField<8, 8, IR::Reg> coord_reg;
132 BitField<20, 8, IR::Reg> meta_reg;
133 BitField<28, 3, TextureType> type;
134 BitField<31, 4, u64> mask;
135 BitField<36, 13, u64> cbuf_offset;
136 } const tld4{insn};
137
138 const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)};
139
140 IR::Reg meta_reg{tld4.meta_reg};
141 IR::Value handle;
142 IR::Value offset;
143 IR::Value offset2;
144 IR::F32 dref;
145 if (!is_bindless) {
146 handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4));
147 } else {
148 handle = v.X(meta_reg++);
149 }
150 switch (offset_type) {
151 case OffsetType::None:
152 break;
153 case OffsetType::AOFFI:
154 offset = MakeOffset(v, meta_reg, tld4.type);
155 break;
156 case OffsetType::PTP:
157 std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg);
158 break;
159 default:
160 throw NotImplementedException("Invalid offset type {}", offset_type);
161 }
162 if (tld4.dc != 0) {
163 dref = v.F(meta_reg++);
164 }
165 IR::TextureInstInfo info{};
166 info.type.Assign(GetType(tld4.type));
167 info.is_depth.Assign(tld4.dc != 0 ? 1 : 0);
168 info.gather_component.Assign(static_cast<u32>(component_type));
169 const IR::Value sample{[&] {
170 if (tld4.dc == 0) {
171 return v.ir.ImageGather(handle, coords, offset, offset2, info);
172 }
173 return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info);
174 }()};
175
176 IR::Reg dest_reg{tld4.dest_reg};
177 for (size_t element = 0; element < 4; ++element) {
178 if (((tld4.mask >> element) & 1) == 0) {
179 continue;
180 }
181 v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
182 ++dest_reg;
183 }
184 if (tld4.sparse_pred != IR::Pred::PT) {
185 v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
186 }
187}
188} // Anonymous namespace
189
190void TranslatorVisitor::TLD4(u64 insn) {
191 union {
192 u64 raw;
193 BitField<56, 2, ComponentType> component;
194 BitField<54, 2, OffsetType> offset;
195 } const tld4{insn};
196 Impl(*this, insn, tld4.component, tld4.offset, false);
197}
198
199void TranslatorVisitor::TLD4_b(u64 insn) {
200 union {
201 u64 raw;
202 BitField<38, 2, ComponentType> component;
203 BitField<36, 2, OffsetType> offset;
204 } const tld4{insn};
205 Impl(*this, insn, tld4.component, tld4.offset, true);
206}
207
208} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
new file mode 100644
index 000000000..34efa2d50
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
@@ -0,0 +1,134 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Precision : u64 {
15 F32,
16 F16,
17};
18
19enum class ComponentType : u64 {
20 R = 0,
21 G = 1,
22 B = 2,
23 A = 3,
24};
25
26union Encoding {
27 u64 raw;
28 BitField<55, 1, Precision> precision;
29 BitField<52, 2, ComponentType> component_type;
30 BitField<51, 1, u64> aoffi;
31 BitField<50, 1, u64> dc;
32 BitField<49, 1, u64> nodep;
33 BitField<28, 8, IR::Reg> dest_reg_b;
34 BitField<0, 8, IR::Reg> dest_reg_a;
35 BitField<8, 8, IR::Reg> src_reg_a;
36 BitField<20, 8, IR::Reg> src_reg_b;
37 BitField<36, 13, u64> cbuf_offset;
38};
39
40void CheckAlignment(IR::Reg reg, size_t alignment) {
41 if (!IR::IsAligned(reg, alignment)) {
42 throw NotImplementedException("Unaligned source register {}", reg);
43 }
44}
45
46IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
47 const IR::U32 value{v.X(reg)};
48 return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
49 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
50}
51
52IR::Value Sample(TranslatorVisitor& v, u64 insn) {
53 const Encoding tld4s{insn};
54 const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tld4s.cbuf_offset * 4))};
55 const IR::Reg reg_a{tld4s.src_reg_a};
56 const IR::Reg reg_b{tld4s.src_reg_b};
57 IR::TextureInstInfo info{};
58 if (tld4s.precision == Precision::F16) {
59 info.relaxed_precision.Assign(1);
60 }
61 info.gather_component.Assign(static_cast<u32>(tld4s.component_type.Value()));
62 info.type.Assign(Shader::TextureType::Color2D);
63 info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0);
64 IR::Value coords;
65 if (tld4s.aoffi != 0) {
66 CheckAlignment(reg_a, 2);
67 coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
68 IR::Value offset = MakeOffset(v, reg_b);
69 if (tld4s.dc != 0) {
70 CheckAlignment(reg_b, 2);
71 IR::F32 dref = v.F(reg_b + 1);
72 return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info);
73 }
74 return v.ir.ImageGather(handle, coords, offset, {}, info);
75 }
76 if (tld4s.dc != 0) {
77 CheckAlignment(reg_a, 2);
78 coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
79 IR::F32 dref = v.F(reg_b);
80 return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info);
81 }
82 coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b));
83 return v.ir.ImageGather(handle, coords, {}, {}, info);
84}
85
86IR::Reg RegStoreComponent32(u64 insn, size_t index) {
87 const Encoding tlds4{insn};
88 switch (index) {
89 case 0:
90 return tlds4.dest_reg_a;
91 case 1:
92 CheckAlignment(tlds4.dest_reg_a, 2);
93 return tlds4.dest_reg_a + 1;
94 case 2:
95 return tlds4.dest_reg_b;
96 case 3:
97 CheckAlignment(tlds4.dest_reg_b, 2);
98 return tlds4.dest_reg_b + 1;
99 }
100 throw LogicError("Invalid store index {}", index);
101}
102
103void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
104 for (size_t component = 0; component < 4; ++component) {
105 const IR::Reg dest{RegStoreComponent32(insn, component)};
106 v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)});
107 }
108}
109
110IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
111 return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
112}
113
114void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
115 std::array<IR::F32, 4> swizzled;
116 for (size_t component = 0; component < 4; ++component) {
117 swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)};
118 }
119 const Encoding tld4s{insn};
120 v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
121 v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
122}
123} // Anonymous namespace
124
125void TranslatorVisitor::TLD4S(u64 insn) {
126 const IR::Value sample{Sample(*this, insn)};
127 if (Encoding{insn}.precision == Precision::F32) {
128 Store32(*this, insn, sample);
129 } else {
130 Store16(*this, insn, sample);
131 }
132}
133
134} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
new file mode 100644
index 000000000..c3fe3ffda
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
@@ -0,0 +1,182 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26Shader::TextureType GetType(TextureType type) {
27 switch (type) {
28 case TextureType::_1D:
29 return Shader::TextureType::Color1D;
30 case TextureType::ARRAY_1D:
31 return Shader::TextureType::ColorArray1D;
32 case TextureType::_2D:
33 return Shader::TextureType::Color2D;
34 case TextureType::ARRAY_2D:
35 return Shader::TextureType::ColorArray2D;
36 case TextureType::_3D:
37 return Shader::TextureType::Color3D;
38 case TextureType::ARRAY_3D:
39 throw NotImplementedException("3D array texture type");
40 case TextureType::CUBE:
41 return Shader::TextureType::ColorCube;
42 case TextureType::ARRAY_CUBE:
43 return Shader::TextureType::ColorArrayCube;
44 }
45 throw NotImplementedException("Invalid texture type {}", type);
46}
47
48IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) {
49 const IR::U32 value{v.X(reg)};
50 const u32 base{has_lod_clamp ? 12U : 16U};
51 return v.ir.CompositeConstruct(
52 v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true),
53 v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true));
54}
55
56void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
57 union {
58 u64 raw;
59 BitField<49, 1, u64> nodep;
60 BitField<35, 1, u64> aoffi;
61 BitField<50, 1, u64> lc;
62 BitField<51, 3, IR::Pred> sparse_pred;
63 BitField<0, 8, IR::Reg> dest_reg;
64 BitField<8, 8, IR::Reg> coord_reg;
65 BitField<20, 8, IR::Reg> derivate_reg;
66 BitField<28, 3, TextureType> type;
67 BitField<31, 4, u64> mask;
68 BitField<36, 13, u64> cbuf_offset;
69 } const txd{insn};
70
71 const bool has_lod_clamp = txd.lc != 0;
72 if (has_lod_clamp) {
73 throw NotImplementedException("TXD.LC - CLAMP is not implemented");
74 }
75
76 IR::Value coords;
77 u32 num_derivates{};
78 IR::Reg base_reg{txd.coord_reg};
79 IR::Reg last_reg;
80 IR::Value handle;
81 if (is_bindless) {
82 handle = v.X(base_reg++);
83 } else {
84 handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4));
85 }
86
87 const auto read_array{[&]() -> IR::F32 {
88 const IR::U32 base{v.ir.Imm32(0)};
89 const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)};
90 const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)};
91 return v.ir.ConvertUToF(32, 16, array_index);
92 }};
93 switch (txd.type) {
94 case TextureType::_1D: {
95 coords = v.F(base_reg);
96 num_derivates = 1;
97 last_reg = base_reg + 1;
98 break;
99 }
100 case TextureType::ARRAY_1D: {
101 last_reg = base_reg + 1;
102 coords = v.ir.CompositeConstruct(v.F(base_reg), read_array());
103 num_derivates = 1;
104 break;
105 }
106 case TextureType::_2D: {
107 last_reg = base_reg + 2;
108 coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1));
109 num_derivates = 2;
110 break;
111 }
112 case TextureType::ARRAY_2D: {
113 last_reg = base_reg + 2;
114 coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array());
115 num_derivates = 2;
116 break;
117 }
118 default:
119 throw NotImplementedException("Invalid texture type");
120 }
121
122 const IR::Reg derivate_reg{txd.derivate_reg};
123 IR::Value derivates;
124 switch (num_derivates) {
125 case 1: {
126 derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1));
127 break;
128 }
129 case 2: {
130 derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1),
131 v.F(derivate_reg + 2), v.F(derivate_reg + 3));
132 break;
133 }
134 default:
135 throw NotImplementedException("Invalid texture type");
136 }
137
138 IR::Value offset;
139 if (txd.aoffi != 0) {
140 offset = MakeOffset(v, last_reg, has_lod_clamp);
141 }
142
143 IR::F32 lod_clamp;
144 if (has_lod_clamp) {
145 // Lod Clamp is a Fixed Point 4.8, we need to transform it to float.
146 // to convert a fixed point, float(value) / float(1 << fixed_point)
147 // in this case the fixed_point is 8.
148 const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))};
149 const IR::F32 fixp_lc{v.ir.ConvertUToF(
150 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))};
151 lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f);
152 }
153
154 IR::TextureInstInfo info{};
155 info.type.Assign(GetType(txd.type));
156 info.num_derivates.Assign(num_derivates);
157 info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0);
158 const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)};
159
160 IR::Reg dest_reg{txd.dest_reg};
161 for (size_t element = 0; element < 4; ++element) {
162 if (((txd.mask >> element) & 1) == 0) {
163 continue;
164 }
165 v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
166 ++dest_reg;
167 }
168 if (txd.sparse_pred != IR::Pred::PT) {
169 v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
170 }
171}
172} // Anonymous namespace
173
174void TranslatorVisitor::TXD(u64 insn) {
175 Impl(*this, insn, false);
176}
177
178void TranslatorVisitor::TXD_b(u64 insn) {
179 Impl(*this, insn, true);
180}
181
182} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
new file mode 100644
index 000000000..983058303
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
@@ -0,0 +1,165 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26Shader::TextureType GetType(TextureType type) {
27 switch (type) {
28 case TextureType::_1D:
29 return Shader::TextureType::Color1D;
30 case TextureType::ARRAY_1D:
31 return Shader::TextureType::ColorArray1D;
32 case TextureType::_2D:
33 return Shader::TextureType::Color2D;
34 case TextureType::ARRAY_2D:
35 return Shader::TextureType::ColorArray2D;
36 case TextureType::_3D:
37 return Shader::TextureType::Color3D;
38 case TextureType::ARRAY_3D:
39 throw NotImplementedException("3D array texture type");
40 case TextureType::CUBE:
41 return Shader::TextureType::ColorCube;
42 case TextureType::ARRAY_CUBE:
43 return Shader::TextureType::ColorArrayCube;
44 }
45 throw NotImplementedException("Invalid texture type {}", type);
46}
47
48IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
49 const auto read_array{
50 [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }};
51 switch (type) {
52 case TextureType::_1D:
53 return v.X(reg);
54 case TextureType::ARRAY_1D:
55 return v.ir.CompositeConstruct(v.X(reg + 1), read_array());
56 case TextureType::_2D:
57 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
58 case TextureType::ARRAY_2D:
59 return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array());
60 case TextureType::_3D:
61 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
62 case TextureType::ARRAY_3D:
63 throw NotImplementedException("3D array texture type");
64 case TextureType::CUBE:
65 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
66 case TextureType::ARRAY_CUBE:
67 return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array());
68 }
69 throw NotImplementedException("Invalid texture type {}", type);
70}
71
72IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
73 const IR::U32 value{v.X(reg++)};
74 switch (type) {
75 case TextureType::_1D:
76 case TextureType::ARRAY_1D:
77 return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
78 case TextureType::_2D:
79 case TextureType::ARRAY_2D:
80 return v.ir.CompositeConstruct(
81 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
82 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
83 case TextureType::_3D:
84 case TextureType::ARRAY_3D:
85 return v.ir.CompositeConstruct(
86 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
87 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
88 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
89 case TextureType::CUBE:
90 case TextureType::ARRAY_CUBE:
91 throw NotImplementedException("Illegal offset on CUBE sample");
92 }
93 throw NotImplementedException("Invalid texture type {}", type);
94}
95
96void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
97 union {
98 u64 raw;
99 BitField<49, 1, u64> nodep;
100 BitField<55, 1, u64> lod;
101 BitField<50, 1, u64> multisample;
102 BitField<35, 1, u64> aoffi;
103 BitField<54, 1, u64> clamp;
104 BitField<51, 3, IR::Pred> sparse_pred;
105 BitField<0, 8, IR::Reg> dest_reg;
106 BitField<8, 8, IR::Reg> coord_reg;
107 BitField<20, 8, IR::Reg> meta_reg;
108 BitField<28, 3, TextureType> type;
109 BitField<31, 4, u64> mask;
110 BitField<36, 13, u64> cbuf_offset;
111 } const tld{insn};
112
113 const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)};
114
115 IR::Reg meta_reg{tld.meta_reg};
116 IR::Value handle;
117 IR::Value offset;
118 IR::U32 lod;
119 IR::U32 multisample;
120 if (is_bindless) {
121 handle = v.X(meta_reg++);
122 } else {
123 handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4));
124 }
125 if (tld.lod != 0) {
126 lod = v.X(meta_reg++);
127 } else {
128 lod = v.ir.Imm32(0U);
129 }
130 if (tld.aoffi != 0) {
131 offset = MakeOffset(v, meta_reg, tld.type);
132 }
133 if (tld.multisample != 0) {
134 multisample = v.X(meta_reg++);
135 }
136 if (tld.clamp != 0) {
137 throw NotImplementedException("TLD.CL - CLAMP is not implmented");
138 }
139 IR::TextureInstInfo info{};
140 info.type.Assign(GetType(tld.type));
141 const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)};
142
143 IR::Reg dest_reg{tld.dest_reg};
144 for (size_t element = 0; element < 4; ++element) {
145 if (((tld.mask >> element) & 1) == 0) {
146 continue;
147 }
148 v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
149 ++dest_reg;
150 }
151 if (tld.sparse_pred != IR::Pred::PT) {
152 v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
153 }
154}
155} // Anonymous namespace
156
157void TranslatorVisitor::TLD(u64 insn) {
158 Impl(*this, insn, false);
159}
160
161void TranslatorVisitor::TLD_b(u64 insn) {
162 Impl(*this, insn, true);
163}
164
165} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
new file mode 100644
index 000000000..5dd7e31b2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
@@ -0,0 +1,242 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Precision : u64 {
15 F16,
16 F32,
17};
18
19constexpr unsigned R = 1;
20constexpr unsigned G = 2;
21constexpr unsigned B = 4;
22constexpr unsigned A = 8;
23
24constexpr std::array RG_LUT{
25 R, //
26 G, //
27 B, //
28 A, //
29 R | G, //
30 R | A, //
31 G | A, //
32 B | A, //
33};
34
35constexpr std::array RGBA_LUT{
36 R | G | B, //
37 R | G | A, //
38 R | B | A, //
39 G | B | A, //
40 R | G | B | A, //
41};
42
43union Encoding {
44 u64 raw;
45 BitField<59, 1, Precision> precision;
46 BitField<54, 1, u64> aoffi;
47 BitField<53, 1, u64> lod;
48 BitField<55, 1, u64> ms;
49 BitField<49, 1, u64> nodep;
50 BitField<28, 8, IR::Reg> dest_reg_b;
51 BitField<0, 8, IR::Reg> dest_reg_a;
52 BitField<8, 8, IR::Reg> src_reg_a;
53 BitField<20, 8, IR::Reg> src_reg_b;
54 BitField<36, 13, u64> cbuf_offset;
55 BitField<50, 3, u64> swizzle;
56 BitField<53, 4, u64> encoding;
57};
58
59void CheckAlignment(IR::Reg reg, size_t alignment) {
60 if (!IR::IsAligned(reg, alignment)) {
61 throw NotImplementedException("Unaligned source register {}", reg);
62 }
63}
64
65IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
66 const IR::U32 value{v.X(reg)};
67 return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
68 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
69}
70
71IR::Value Sample(TranslatorVisitor& v, u64 insn) {
72 const Encoding tlds{insn};
73 const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))};
74 const IR::Reg reg_a{tlds.src_reg_a};
75 const IR::Reg reg_b{tlds.src_reg_b};
76 IR::Value coords;
77 IR::U32 lod{v.ir.Imm32(0U)};
78 IR::Value offsets;
79 IR::U32 multisample;
80 Shader::TextureType texture_type{};
81 switch (tlds.encoding) {
82 case 0:
83 texture_type = Shader::TextureType::Color1D;
84 coords = v.X(reg_a);
85 break;
86 case 1:
87 texture_type = Shader::TextureType::Color1D;
88 coords = v.X(reg_a);
89 lod = v.X(reg_b);
90 break;
91 case 2:
92 texture_type = Shader::TextureType::Color2D;
93 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b));
94 break;
95 case 4:
96 CheckAlignment(reg_a, 2);
97 texture_type = Shader::TextureType::Color2D;
98 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
99 offsets = MakeOffset(v, reg_b);
100 break;
101 case 5:
102 CheckAlignment(reg_a, 2);
103 texture_type = Shader::TextureType::Color2D;
104 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
105 lod = v.X(reg_b);
106 break;
107 case 6:
108 CheckAlignment(reg_a, 2);
109 texture_type = Shader::TextureType::Color2D;
110 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
111 multisample = v.X(reg_b);
112 break;
113 case 7:
114 CheckAlignment(reg_a, 2);
115 texture_type = Shader::TextureType::Color3D;
116 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b));
117 break;
118 case 8: {
119 CheckAlignment(reg_b, 2);
120 const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))};
121 texture_type = Shader::TextureType::ColorArray2D;
122 coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array);
123 break;
124 }
125 case 12:
126 CheckAlignment(reg_a, 2);
127 CheckAlignment(reg_b, 2);
128 texture_type = Shader::TextureType::Color2D;
129 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
130 lod = v.X(reg_b);
131 offsets = MakeOffset(v, reg_b + 1);
132 break;
133 default:
134 throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value());
135 }
136 IR::TextureInstInfo info{};
137 if (tlds.precision == Precision::F16) {
138 info.relaxed_precision.Assign(1);
139 }
140 info.type.Assign(texture_type);
141 return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info);
142}
143
144unsigned Swizzle(u64 insn) {
145 const Encoding tlds{insn};
146 const size_t encoding{tlds.swizzle};
147 if (tlds.dest_reg_b == IR::Reg::RZ) {
148 if (encoding >= RG_LUT.size()) {
149 throw NotImplementedException("Illegal RG encoding {}", encoding);
150 }
151 return RG_LUT[encoding];
152 } else {
153 if (encoding >= RGBA_LUT.size()) {
154 throw NotImplementedException("Illegal RGBA encoding {}", encoding);
155 }
156 return RGBA_LUT[encoding];
157 }
158}
159
160IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
161 return IR::F32{v.ir.CompositeExtract(sample, component)};
162}
163
164IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
165 const Encoding tlds{insn};
166 switch (index) {
167 case 0:
168 return tlds.dest_reg_a;
169 case 1:
170 CheckAlignment(tlds.dest_reg_a, 2);
171 return tlds.dest_reg_a + 1;
172 case 2:
173 return tlds.dest_reg_b;
174 case 3:
175 CheckAlignment(tlds.dest_reg_b, 2);
176 return tlds.dest_reg_b + 1;
177 }
178 throw LogicError("Invalid store index {}", index);
179}
180
181void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
182 const unsigned swizzle{Swizzle(insn)};
183 unsigned store_index{0};
184 for (unsigned component = 0; component < 4; ++component) {
185 if (((swizzle >> component) & 1) == 0) {
186 continue;
187 }
188 const IR::Reg dest{RegStoreComponent32(insn, store_index)};
189 v.F(dest, Extract(v, sample, component));
190 ++store_index;
191 }
192}
193
194IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
195 return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
196}
197
198void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
199 const unsigned swizzle{Swizzle(insn)};
200 unsigned store_index{0};
201 std::array<IR::F32, 4> swizzled;
202 for (unsigned component = 0; component < 4; ++component) {
203 if (((swizzle >> component) & 1) == 0) {
204 continue;
205 }
206 swizzled[store_index] = Extract(v, sample, component);
207 ++store_index;
208 }
209 const IR::F32 zero{v.ir.Imm32(0.0f)};
210 const Encoding tlds{insn};
211 switch (store_index) {
212 case 1:
213 v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero));
214 break;
215 case 2:
216 case 3:
217 case 4:
218 v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
219 switch (store_index) {
220 case 2:
221 break;
222 case 3:
223 v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero));
224 break;
225 case 4:
226 v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
227 break;
228 }
229 break;
230 }
231}
232} // Anonymous namespace
233
234void TranslatorVisitor::TLDS(u64 insn) {
235 const IR::Value sample{Sample(*this, insn)};
236 if (Encoding{insn}.precision == Precision::F32) {
237 Store32(*this, insn, sample);
238 } else {
239 Store16(*this, insn, sample);
240 }
241}
242} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
new file mode 100644
index 000000000..aea3c0e62
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
@@ -0,0 +1,131 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26Shader::TextureType GetType(TextureType type) {
27 switch (type) {
28 case TextureType::_1D:
29 return Shader::TextureType::Color1D;
30 case TextureType::ARRAY_1D:
31 return Shader::TextureType::ColorArray1D;
32 case TextureType::_2D:
33 return Shader::TextureType::Color2D;
34 case TextureType::ARRAY_2D:
35 return Shader::TextureType::ColorArray2D;
36 case TextureType::_3D:
37 return Shader::TextureType::Color3D;
38 case TextureType::ARRAY_3D:
39 throw NotImplementedException("3D array texture type");
40 case TextureType::CUBE:
41 return Shader::TextureType::ColorCube;
42 case TextureType::ARRAY_CUBE:
43 return Shader::TextureType::ColorArrayCube;
44 }
45 throw NotImplementedException("Invalid texture type {}", type);
46}
47
48IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
49 // The ISA reads an array component here, but this is not needed on high level shading languages
50 // We are dropping this information.
51 switch (type) {
52 case TextureType::_1D:
53 return v.F(reg);
54 case TextureType::ARRAY_1D:
55 return v.F(reg + 1);
56 case TextureType::_2D:
57 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
58 case TextureType::ARRAY_2D:
59 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2));
60 case TextureType::_3D:
61 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
62 case TextureType::ARRAY_3D:
63 throw NotImplementedException("3D array texture type");
64 case TextureType::CUBE:
65 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
66 case TextureType::ARRAY_CUBE:
67 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3));
68 }
69 throw NotImplementedException("Invalid texture type {}", type);
70}
71
72void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
73 union {
74 u64 raw;
75 BitField<49, 1, u64> nodep;
76 BitField<35, 1, u64> ndv;
77 BitField<0, 8, IR::Reg> dest_reg;
78 BitField<8, 8, IR::Reg> coord_reg;
79 BitField<20, 8, IR::Reg> meta_reg;
80 BitField<28, 3, TextureType> type;
81 BitField<31, 4, u64> mask;
82 BitField<36, 13, u64> cbuf_offset;
83 } const tmml{insn};
84
85 if ((tmml.mask & 0b1100) != 0) {
86 throw NotImplementedException("TMML BA results are not implmented");
87 }
88 const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)};
89
90 IR::U32 handle;
91 IR::Reg meta_reg{tmml.meta_reg};
92 if (is_bindless) {
93 handle = v.X(meta_reg++);
94 } else {
95 handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4));
96 }
97 IR::TextureInstInfo info{};
98 info.type.Assign(GetType(tmml.type));
99 const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)};
100
101 IR::Reg dest_reg{tmml.dest_reg};
102 for (size_t element = 0; element < 4; ++element) {
103 if (((tmml.mask >> element) & 1) == 0) {
104 continue;
105 }
106 IR::F32 value{v.ir.CompositeExtract(sample, element)};
107 if (element < 2) {
108 IR::U32 casted_value;
109 if (element == 0) {
110 casted_value = v.ir.ConvertFToU(32, value);
111 } else {
112 casted_value = v.ir.ConvertFToS(16, value);
113 }
114 v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8)));
115 } else {
116 v.F(dest_reg, value);
117 }
118 ++dest_reg;
119 }
120}
121} // Anonymous namespace
122
123void TranslatorVisitor::TMML(u64 insn) {
124 Impl(*this, insn, false);
125}
126
127void TranslatorVisitor::TMML_b(u64 insn) {
128 Impl(*this, insn, true);
129}
130
131} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
new file mode 100644
index 000000000..0459e5473
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
@@ -0,0 +1,76 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Mode : u64 {
15 Dimension = 1,
16 TextureType = 2,
17 SamplePos = 5,
18};
19
20IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) {
21 switch (mode) {
22 case Mode::Dimension: {
23 const IR::U32 lod{v.X(src_reg)};
24 return v.ir.ImageQueryDimension(handle, lod);
25 }
26 case Mode::TextureType:
27 case Mode::SamplePos:
28 default:
29 throw NotImplementedException("Mode {}", mode);
30 }
31}
32
33void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) {
34 union {
35 u64 raw;
36 BitField<49, 1, u64> nodep;
37 BitField<0, 8, IR::Reg> dest_reg;
38 BitField<8, 8, IR::Reg> src_reg;
39 BitField<22, 3, Mode> mode;
40 BitField<31, 4, u64> mask;
41 } const txq{insn};
42
43 IR::Reg src_reg{txq.src_reg};
44 IR::U32 handle;
45 if (cbuf_offset) {
46 handle = v.ir.Imm32(*cbuf_offset);
47 } else {
48 handle = v.X(src_reg);
49 ++src_reg;
50 }
51 const IR::Value query{Query(v, handle, txq.mode, src_reg)};
52 IR::Reg dest_reg{txq.dest_reg};
53 for (int element = 0; element < 4; ++element) {
54 if (((txq.mask >> element) & 1) == 0) {
55 continue;
56 }
57 v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))});
58 ++dest_reg;
59 }
60}
61} // Anonymous namespace
62
63void TranslatorVisitor::TXQ(u64 insn) {
64 union {
65 u64 raw;
66 BitField<36, 13, u64> cbuf_offset;
67 } const txq{insn};
68
69 Impl(*this, insn, static_cast<u32>(txq.cbuf_offset * 4));
70}
71
72void TranslatorVisitor::TXQ_b(u64 insn) {
73 Impl(*this, insn, std::nullopt);
74}
75
76} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
new file mode 100644
index 000000000..e1f4174cf
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
@@ -0,0 +1,30 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/exception.h"
6#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
7
8namespace Shader::Maxwell {
9
10IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width,
11 u32 selector, bool is_signed) {
12 switch (width) {
13 case VideoWidth::Byte:
14 case VideoWidth::Unknown:
15 return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed);
16 case VideoWidth::Short:
17 return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed);
18 case VideoWidth::Word:
19 return value;
20 default:
21 throw NotImplementedException("Unknown VideoWidth {}", width);
22 }
23}
24
25VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) {
26 // immediates must be 16-bit format.
27 return is_immediate ? VideoWidth::Short : width;
28}
29
30} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
new file mode 100644
index 000000000..40c0b907c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
@@ -0,0 +1,23 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11enum class VideoWidth : u64 {
12 Byte,
13 Unknown,
14 Short,
15 Word,
16};
17
18[[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value,
19 VideoWidth width, u32 selector, bool is_signed);
20
21[[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate);
22
23} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
new file mode 100644
index 000000000..78869601f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
@@ -0,0 +1,92 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
9
10namespace Shader::Maxwell {
11namespace {
12enum class VideoMinMaxOps : u64 {
13 MRG_16H,
14 MRG_16L,
15 MRG_8B0,
16 MRG_8B2,
17 ACC,
18 MIN,
19 MAX,
20};
21
22[[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs,
23 VideoMinMaxOps op, bool is_signed) {
24 switch (op) {
25 case VideoMinMaxOps::MIN:
26 return ir.IMin(lhs, rhs, is_signed);
27 case VideoMinMaxOps::MAX:
28 return ir.IMax(lhs, rhs, is_signed);
29 default:
30 throw NotImplementedException("VMNMX op {}", op);
31 }
32}
33} // Anonymous namespace
34
35void TranslatorVisitor::VMNMX(u64 insn) {
36 union {
37 u64 raw;
38 BitField<0, 8, IR::Reg> dest_reg;
39 BitField<20, 16, u64> src_b_imm;
40 BitField<28, 2, u64> src_b_selector;
41 BitField<29, 2, VideoWidth> src_b_width;
42 BitField<36, 2, u64> src_a_selector;
43 BitField<37, 2, VideoWidth> src_a_width;
44 BitField<47, 1, u64> cc;
45 BitField<48, 1, u64> src_a_sign;
46 BitField<49, 1, u64> src_b_sign;
47 BitField<50, 1, u64> is_src_b_reg;
48 BitField<51, 3, VideoMinMaxOps> op;
49 BitField<54, 1, u64> dest_sign;
50 BitField<55, 1, u64> sat;
51 BitField<56, 1, u64> mx;
52 } const vmnmx{insn};
53
54 if (vmnmx.cc != 0) {
55 throw NotImplementedException("VMNMX CC");
56 }
57 if (vmnmx.sat != 0) {
58 throw NotImplementedException("VMNMX SAT");
59 }
60 // Selectors were shown to default to 2 in unit tests
61 if (vmnmx.src_a_selector != 2) {
62 throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value());
63 }
64 if (vmnmx.src_b_selector != 2) {
65 throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value());
66 }
67 if (vmnmx.src_a_width != VideoWidth::Word) {
68 throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value());
69 }
70
71 const bool is_b_imm{vmnmx.is_src_b_reg == 0};
72 const IR::U32 src_a{GetReg8(insn)};
73 const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)};
74 const IR::U32 src_c{GetReg39(insn)};
75
76 const VideoWidth a_width{vmnmx.src_a_width};
77 const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)};
78
79 const bool src_a_signed{vmnmx.src_a_sign != 0};
80 const bool src_b_signed{vmnmx.src_b_sign != 0};
81 const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)};
82 const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)};
83
84 // First operation's sign is only dependent on operand b's sign
85 const bool op_1_signed{src_b_signed};
86
87 const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed)
88 : ir.IMin(op_a, op_b, op_1_signed)};
89 X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0));
90}
91
92} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
new file mode 100644
index 000000000..cc2e6d6e6
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
@@ -0,0 +1,64 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::VMAD(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<20, 16, u64> src_b_imm;
16 BitField<28, 2, u64> src_b_selector;
17 BitField<29, 2, VideoWidth> src_b_width;
18 BitField<36, 2, u64> src_a_selector;
19 BitField<37, 2, VideoWidth> src_a_width;
20 BitField<47, 1, u64> cc;
21 BitField<48, 1, u64> src_a_sign;
22 BitField<49, 1, u64> src_b_sign;
23 BitField<50, 1, u64> is_src_b_reg;
24 BitField<51, 2, u64> scale;
25 BitField<53, 1, u64> src_c_neg;
26 BitField<54, 1, u64> src_a_neg;
27 BitField<55, 1, u64> sat;
28 } const vmad{insn};
29
30 if (vmad.cc != 0) {
31 throw NotImplementedException("VMAD CC");
32 }
33 if (vmad.sat != 0) {
34 throw NotImplementedException("VMAD SAT");
35 }
36 if (vmad.scale != 0) {
37 throw NotImplementedException("VMAD SCALE");
38 }
39 if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) {
40 throw NotImplementedException("VMAD PO");
41 }
42 if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) {
43 throw NotImplementedException("VMAD NEG");
44 }
45 const bool is_b_imm{vmad.is_src_b_reg == 0};
46 const IR::U32 src_a{GetReg8(insn)};
47 const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)};
48 const IR::U32 src_c{GetReg39(insn)};
49
50 const u32 a_selector{static_cast<u32>(vmad.src_a_selector)};
51 // Immediate values can't have a selector
52 const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)};
53 const VideoWidth a_width{vmad.src_a_width};
54 const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)};
55
56 const bool src_a_signed{vmad.src_a_sign != 0};
57 const bool src_b_signed{vmad.src_b_sign != 0};
58 const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
59 const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
60
61 X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c));
62}
63
64} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
new file mode 100644
index 000000000..1b66abc33
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
@@ -0,0 +1,92 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class VsetpCompareOp : u64 {
14 False = 0,
15 LessThan,
16 Equal,
17 LessThanEqual,
18 GreaterThan = 16,
19 NotEqual,
20 GreaterThanEqual,
21 True,
22};
23
24CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) {
25 switch (op) {
26 case VsetpCompareOp::False:
27 return CompareOp::False;
28 case VsetpCompareOp::LessThan:
29 return CompareOp::LessThan;
30 case VsetpCompareOp::Equal:
31 return CompareOp::Equal;
32 case VsetpCompareOp::LessThanEqual:
33 return CompareOp::LessThanEqual;
34 case VsetpCompareOp::GreaterThan:
35 return CompareOp::GreaterThan;
36 case VsetpCompareOp::NotEqual:
37 return CompareOp::NotEqual;
38 case VsetpCompareOp::GreaterThanEqual:
39 return CompareOp::GreaterThanEqual;
40 case VsetpCompareOp::True:
41 return CompareOp::True;
42 default:
43 throw NotImplementedException("Invalid compare op {}", op);
44 }
45}
46} // Anonymous namespace
47
48void TranslatorVisitor::VSETP(u64 insn) {
49 union {
50 u64 raw;
51 BitField<0, 3, IR::Pred> dest_pred_b;
52 BitField<3, 3, IR::Pred> dest_pred_a;
53 BitField<20, 16, u64> src_b_imm;
54 BitField<28, 2, u64> src_b_selector;
55 BitField<29, 2, VideoWidth> src_b_width;
56 BitField<36, 2, u64> src_a_selector;
57 BitField<37, 2, VideoWidth> src_a_width;
58 BitField<39, 3, IR::Pred> bop_pred;
59 BitField<42, 1, u64> neg_bop_pred;
60 BitField<43, 5, VsetpCompareOp> compare_op;
61 BitField<45, 2, BooleanOp> bop;
62 BitField<48, 1, u64> src_a_sign;
63 BitField<49, 1, u64> src_b_sign;
64 BitField<50, 1, u64> is_src_b_reg;
65 } const vsetp{insn};
66
67 const bool is_b_imm{vsetp.is_src_b_reg == 0};
68 const IR::U32 src_a{GetReg8(insn)};
69 const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)};
70
71 const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)};
72 const u32 b_selector{static_cast<u32>(vsetp.src_b_selector)};
73 const VideoWidth a_width{vsetp.src_a_width};
74 const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)};
75
76 const bool src_a_signed{vsetp.src_a_sign != 0};
77 const bool src_b_signed{vsetp.src_b_sign != 0};
78 const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
79 const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
80
81 // Compare operation's sign is only dependent on operand b's sign
82 const bool compare_signed{src_b_signed};
83 const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)};
84 const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)};
85 const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)};
86 const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)};
87 const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)};
88 ir.SetPred(vsetp.dest_pred_a, result_a);
89 ir.SetPred(vsetp.dest_pred_b, result_b);
90}
91
92} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
new file mode 100644
index 000000000..7ce370f09
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
@@ -0,0 +1,54 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class VoteOp : u64 {
12 ALL,
13 ANY,
14 EQ,
15};
16
17[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) {
18 switch (vote_op) {
19 case VoteOp::ALL:
20 return ir.VoteAll(pred);
21 case VoteOp::ANY:
22 return ir.VoteAny(pred);
23 case VoteOp::EQ:
24 return ir.VoteEqual(pred);
25 default:
26 throw NotImplementedException("Invalid VOTE op {}", vote_op);
27 }
28}
29
30void Vote(TranslatorVisitor& v, u64 insn) {
31 union {
32 u64 insn;
33 BitField<0, 8, IR::Reg> dest_reg;
34 BitField<39, 3, IR::Pred> pred_a;
35 BitField<42, 1, u64> neg_pred_a;
36 BitField<45, 3, IR::Pred> pred_b;
37 BitField<48, 2, VoteOp> vote_op;
38 } const vote{insn};
39
40 const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)};
41 v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op));
42 v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred));
43}
44} // Anonymous namespace
45
46void TranslatorVisitor::VOTE(u64 insn) {
47 Vote(*this, insn);
48}
49
50void TranslatorVisitor::VOTE_vtg(u64) {
51 LOG_WARNING(Shader, "(STUBBED) called");
52}
53
54} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
new file mode 100644
index 000000000..550fed55c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
@@ -0,0 +1,69 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class ShuffleMode : u64 {
14 IDX,
15 UP,
16 DOWN,
17 BFLY,
18};
19
20[[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value,
21 const IR::U32& index, const IR::U32& mask,
22 ShuffleMode shfl_op) {
23 const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))};
24 const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))};
25 switch (shfl_op) {
26 case ShuffleMode::IDX:
27 return ir.ShuffleIndex(value, index, clamp, seg_mask);
28 case ShuffleMode::UP:
29 return ir.ShuffleUp(value, index, clamp, seg_mask);
30 case ShuffleMode::DOWN:
31 return ir.ShuffleDown(value, index, clamp, seg_mask);
32 case ShuffleMode::BFLY:
33 return ir.ShuffleButterfly(value, index, clamp, seg_mask);
34 default:
35 throw NotImplementedException("Invalid SHFL op {}", shfl_op);
36 }
37}
38
39void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) {
40 union {
41 u64 insn;
42 BitField<0, 8, IR::Reg> dest_reg;
43 BitField<8, 8, IR::Reg> src_reg;
44 BitField<30, 2, ShuffleMode> mode;
45 BitField<48, 3, IR::Pred> pred;
46 } const shfl{insn};
47
48 const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)};
49 v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result));
50 v.X(shfl.dest_reg, result);
51}
52} // Anonymous namespace
53
54void TranslatorVisitor::SHFL(u64 insn) {
55 union {
56 u64 insn;
57 BitField<20, 5, u64> src_a_imm;
58 BitField<28, 1, u64> src_a_flag;
59 BitField<29, 1, u64> src_b_flag;
60 BitField<34, 13, u64> src_b_imm;
61 } const flags{insn};
62 const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm))
63 : GetReg20(insn)};
64 const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm))
65 : GetReg39(insn)};
66 Shuffle(*this, insn, src_a, src_b);
67}
68
69} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
new file mode 100644
index 000000000..8e3c4c5d5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
@@ -0,0 +1,52 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/environment.h"
6#include "shader_recompiler/frontend/ir/basic_block.h"
7#include "shader_recompiler/frontend/maxwell/decode.h"
8#include "shader_recompiler/frontend/maxwell/location.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10#include "shader_recompiler/frontend/maxwell/translate/translate.h"
11
12namespace Shader::Maxwell {
13
14template <auto method>
15static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) {
16 using MethodType = decltype(method);
17 if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, Location, u64>) {
18 (visitor.*method)(pc, insn);
19 } else if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, u64>) {
20 (visitor.*method)(insn);
21 } else {
22 (visitor.*method)();
23 }
24}
25
26void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) {
27 if (location_begin == location_end) {
28 return;
29 }
30 TranslatorVisitor visitor{env, *block};
31 for (Location pc = location_begin; pc != location_end; ++pc) {
32 const u64 insn{env.ReadInstruction(pc.Offset())};
33 try {
34 const Opcode opcode{Decode(insn)};
35 switch (opcode) {
36#define INST(name, cute, mask) \
37 case Opcode::name: \
38 Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \
39 break;
40#include "shader_recompiler/frontend/maxwell/maxwell.inc"
41#undef OPCODE
42 default:
43 throw LogicError("Invalid opcode {}", opcode);
44 }
45 } catch (Exception& exception) {
46 exception.Prepend(fmt::format("Translate {}: ", Decode(insn)));
47 throw;
48 }
49 }
50}
51
52} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h
new file mode 100644
index 000000000..a3edd2e46
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h
@@ -0,0 +1,14 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/environment.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9
10namespace Shader::Maxwell {
11
12void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end);
13
14} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
new file mode 100644
index 000000000..c067d459c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -0,0 +1,223 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7#include <vector>
8
9#include "common/settings.h"
10#include "shader_recompiler/exception.h"
11#include "shader_recompiler/frontend/ir/basic_block.h"
12#include "shader_recompiler/frontend/ir/post_order.h"
13#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
14#include "shader_recompiler/frontend/maxwell/translate/translate.h"
15#include "shader_recompiler/frontend/maxwell/translate_program.h"
16#include "shader_recompiler/host_translate_info.h"
17#include "shader_recompiler/ir_opt/passes.h"
18
19namespace Shader::Maxwell {
20namespace {
21IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
22 size_t num_syntax_blocks{};
23 for (const auto& node : syntax_list) {
24 if (node.type == IR::AbstractSyntaxNode::Type::Block) {
25 ++num_syntax_blocks;
26 }
27 }
28 IR::BlockList blocks;
29 blocks.reserve(num_syntax_blocks);
30 for (const auto& node : syntax_list) {
31 if (node.type == IR::AbstractSyntaxNode::Type::Block) {
32 blocks.push_back(node.data.block);
33 }
34 }
35 return blocks;
36}
37
38void RemoveUnreachableBlocks(IR::Program& program) {
39 // Some blocks might be unreachable if a function call exists unconditionally
40 // If this happens the number of blocks and post order blocks will mismatch
41 if (program.blocks.size() == program.post_order_blocks.size()) {
42 return;
43 }
44 const auto begin{program.blocks.begin() + 1};
45 const auto end{program.blocks.end()};
46 const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }};
47 program.blocks.erase(std::remove_if(begin, end, pred), end);
48}
49
50void CollectInterpolationInfo(Environment& env, IR::Program& program) {
51 if (program.stage != Stage::Fragment) {
52 return;
53 }
54 const ProgramHeader& sph{env.SPH()};
55 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
56 std::optional<PixelImap> imap;
57 for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) {
58 if (value == PixelImap::Unused) {
59 continue;
60 }
61 if (imap && imap != value) {
62 throw NotImplementedException("Per component interpolation");
63 }
64 imap = value;
65 }
66 if (!imap) {
67 continue;
68 }
69 program.info.interpolation[index] = [&] {
70 switch (*imap) {
71 case PixelImap::Unused:
72 case PixelImap::Perspective:
73 return Interpolation::Smooth;
74 case PixelImap::Constant:
75 return Interpolation::Flat;
76 case PixelImap::ScreenLinear:
77 return Interpolation::NoPerspective;
78 }
79 throw NotImplementedException("Unknown interpolation {}", *imap);
80 }();
81 }
82}
83
84void AddNVNStorageBuffers(IR::Program& program) {
85 if (!program.info.uses_global_memory) {
86 return;
87 }
88 const u32 driver_cbuf{0};
89 const u32 descriptor_size{0x10};
90 const u32 num_buffers{16};
91 const u32 base{[&] {
92 switch (program.stage) {
93 case Stage::VertexA:
94 case Stage::VertexB:
95 return 0x110u;
96 case Stage::TessellationControl:
97 return 0x210u;
98 case Stage::TessellationEval:
99 return 0x310u;
100 case Stage::Geometry:
101 return 0x410u;
102 case Stage::Fragment:
103 return 0x510u;
104 case Stage::Compute:
105 return 0x310u;
106 }
107 throw InvalidArgument("Invalid stage {}", program.stage);
108 }()};
109 auto& descs{program.info.storage_buffers_descriptors};
110 for (u32 index = 0; index < num_buffers; ++index) {
111 if (!program.info.nvn_buffer_used[index]) {
112 continue;
113 }
114 const u32 offset{base + index * descriptor_size};
115 const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
116 if (it != descs.end()) {
117 it->is_written |= program.info.stores_global_memory;
118 continue;
119 }
120 descs.push_back({
121 .cbuf_index = driver_cbuf,
122 .cbuf_offset = offset,
123 .count = 1,
124 .is_written = program.info.stores_global_memory,
125 });
126 }
127}
128} // Anonymous namespace
129
130IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
131 Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
132 IR::Program program;
133 program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg);
134 program.blocks = GenerateBlocks(program.syntax_list);
135 program.post_order_blocks = PostOrder(program.syntax_list.front());
136 program.stage = env.ShaderStage();
137 program.local_memory_size = env.LocalMemorySize();
138 switch (program.stage) {
139 case Stage::TessellationControl: {
140 const ProgramHeader& sph{env.SPH()};
141 program.invocations = sph.common2.threads_per_input_primitive;
142 break;
143 }
144 case Stage::Geometry: {
145 const ProgramHeader& sph{env.SPH()};
146 program.output_topology = sph.common3.output_topology;
147 program.output_vertices = sph.common4.max_output_vertices;
148 program.invocations = sph.common2.threads_per_input_primitive;
149 program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0;
150 if (program.is_geometry_passthrough) {
151 const auto& mask{env.GpPassthroughMask()};
152 for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) {
153 program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
154 }
155 }
156 break;
157 }
158 case Stage::Compute:
159 program.workgroup_size = env.WorkgroupSize();
160 program.shared_memory_size = env.SharedMemorySize();
161 break;
162 default:
163 break;
164 }
165 RemoveUnreachableBlocks(program);
166
167 // Replace instructions before the SSA rewrite
168 if (!host_info.support_float16) {
169 Optimization::LowerFp16ToFp32(program);
170 }
171 if (!host_info.support_int64) {
172 Optimization::LowerInt64ToInt32(program);
173 }
174 Optimization::SsaRewritePass(program);
175
176 Optimization::GlobalMemoryToStorageBufferPass(program);
177 Optimization::TexturePass(env, program);
178
179 Optimization::ConstantPropagationPass(program);
180 Optimization::DeadCodeEliminationPass(program);
181 if (Settings::values.renderer_debug) {
182 Optimization::VerificationPass(program);
183 }
184 Optimization::CollectShaderInfoPass(env, program);
185 CollectInterpolationInfo(env, program);
186 AddNVNStorageBuffers(program);
187 return program;
188}
189
190IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
191 Environment& env_vertex_b) {
192 IR::Program result{};
193 Optimization::VertexATransformPass(vertex_a);
194 Optimization::VertexBTransformPass(vertex_b);
195 for (const auto& term : vertex_a.syntax_list) {
196 if (term.type != IR::AbstractSyntaxNode::Type::Return) {
197 result.syntax_list.push_back(term);
198 }
199 }
200 result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(),
201 vertex_b.syntax_list.end());
202 result.blocks = GenerateBlocks(result.syntax_list);
203 result.post_order_blocks = vertex_b.post_order_blocks;
204 for (const auto& block : vertex_a.post_order_blocks) {
205 result.post_order_blocks.push_back(block);
206 }
207 result.stage = Stage::VertexB;
208 result.info = vertex_a.info;
209 result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size);
210 result.info.loads.mask |= vertex_b.info.loads.mask;
211 result.info.stores.mask |= vertex_b.info.stores.mask;
212
213 Optimization::JoinTextureInfo(result.info, vertex_b.info);
214 Optimization::JoinStorageInfo(result.info, vertex_b.info);
215 Optimization::DeadCodeEliminationPass(result);
216 if (Settings::values.renderer_debug) {
217 Optimization::VerificationPass(result);
218 }
219 Optimization::CollectShaderInfoPass(env_vertex_b, result);
220 return result;
221}
222
223} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h
new file mode 100644
index 000000000..a84814811
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.h
@@ -0,0 +1,23 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/environment.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9#include "shader_recompiler/frontend/ir/program.h"
10#include "shader_recompiler/frontend/maxwell/control_flow.h"
11#include "shader_recompiler/host_translate_info.h"
12#include "shader_recompiler/object_pool.h"
13
14namespace Shader::Maxwell {
15
16[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
17 ObjectPool<IR::Block>& block_pool, Environment& env,
18 Flow::CFG& cfg, const HostTranslateInfo& host_info);
19
20[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
21 Environment& env_vertex_b);
22
23} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
new file mode 100644
index 000000000..94a584219
--- /dev/null
+++ b/src/shader_recompiler/host_translate_info.h
@@ -0,0 +1,18 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7namespace Shader {
8
9// Try to keep entries here to a minimum
10// They can accidentally change the cached information in a shader
11
12/// Misc information about the host
13struct HostTranslateInfo {
14 bool support_float16{}; ///< True when the device supports 16-bit floats
15 bool support_int64{}; ///< True when the device supports 64-bit integers
16};
17
18} // namespace Shader
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
new file mode 100644
index 000000000..5ead930f1
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -0,0 +1,928 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/alignment.h"
6#include "shader_recompiler/environment.h"
7#include "shader_recompiler/frontend/ir/modifiers.h"
8#include "shader_recompiler/frontend/ir/program.h"
9#include "shader_recompiler/frontend/ir/value.h"
10#include "shader_recompiler/ir_opt/passes.h"
11#include "shader_recompiler/shader_info.h"
12
13namespace Shader::Optimization {
14namespace {
15void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
16 if (count != 1) {
17 throw NotImplementedException("Constant buffer descriptor indexing");
18 }
19 if ((info.constant_buffer_mask & (1U << index)) != 0) {
20 return;
21 }
22 info.constant_buffer_mask |= 1U << index;
23
24 auto& cbufs{info.constant_buffer_descriptors};
25 cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index),
26 ConstantBufferDescriptor{
27 .index = index,
28 .count = 1,
29 });
30}
31
32void GetPatch(Info& info, IR::Patch patch) {
33 if (!IR::IsGeneric(patch)) {
34 throw NotImplementedException("Reading non-generic patch {}", patch);
35 }
36 info.uses_patches.at(IR::GenericPatchIndex(patch)) = true;
37}
38
39void SetPatch(Info& info, IR::Patch patch) {
40 if (IR::IsGeneric(patch)) {
41 info.uses_patches.at(IR::GenericPatchIndex(patch)) = true;
42 return;
43 }
44 switch (patch) {
45 case IR::Patch::TessellationLodLeft:
46 case IR::Patch::TessellationLodTop:
47 case IR::Patch::TessellationLodRight:
48 case IR::Patch::TessellationLodBottom:
49 info.stores_tess_level_outer = true;
50 break;
51 case IR::Patch::TessellationLodInteriorU:
52 case IR::Patch::TessellationLodInteriorV:
53 info.stores_tess_level_inner = true;
54 break;
55 default:
56 throw NotImplementedException("Set patch {}", patch);
57 }
58}
59
60void CheckCBufNVN(Info& info, IR::Inst& inst) {
61 const IR::Value cbuf_index{inst.Arg(0)};
62 if (!cbuf_index.IsImmediate()) {
63 info.nvn_buffer_used.set();
64 return;
65 }
66 const u32 index{cbuf_index.U32()};
67 if (index != 0) {
68 return;
69 }
70 const IR::Value cbuf_offset{inst.Arg(1)};
71 if (!cbuf_offset.IsImmediate()) {
72 info.nvn_buffer_used.set();
73 return;
74 }
75 const u32 offset{cbuf_offset.U32()};
76 const u32 descriptor_size{0x10};
77 const u32 upper_limit{info.nvn_buffer_base + descriptor_size * 16};
78 if (offset >= info.nvn_buffer_base && offset < upper_limit) {
79 const std::size_t nvn_index{(offset - info.nvn_buffer_base) / descriptor_size};
80 info.nvn_buffer_used.set(nvn_index, true);
81 }
82}
83
84void VisitUsages(Info& info, IR::Inst& inst) {
85 switch (inst.GetOpcode()) {
86 case IR::Opcode::CompositeConstructF16x2:
87 case IR::Opcode::CompositeConstructF16x3:
88 case IR::Opcode::CompositeConstructF16x4:
89 case IR::Opcode::CompositeExtractF16x2:
90 case IR::Opcode::CompositeExtractF16x3:
91 case IR::Opcode::CompositeExtractF16x4:
92 case IR::Opcode::CompositeInsertF16x2:
93 case IR::Opcode::CompositeInsertF16x3:
94 case IR::Opcode::CompositeInsertF16x4:
95 case IR::Opcode::SelectF16:
96 case IR::Opcode::BitCastU16F16:
97 case IR::Opcode::BitCastF16U16:
98 case IR::Opcode::PackFloat2x16:
99 case IR::Opcode::UnpackFloat2x16:
100 case IR::Opcode::ConvertS16F16:
101 case IR::Opcode::ConvertS32F16:
102 case IR::Opcode::ConvertS64F16:
103 case IR::Opcode::ConvertU16F16:
104 case IR::Opcode::ConvertU32F16:
105 case IR::Opcode::ConvertU64F16:
106 case IR::Opcode::ConvertF16S8:
107 case IR::Opcode::ConvertF16S16:
108 case IR::Opcode::ConvertF16S32:
109 case IR::Opcode::ConvertF16S64:
110 case IR::Opcode::ConvertF16U8:
111 case IR::Opcode::ConvertF16U16:
112 case IR::Opcode::ConvertF16U32:
113 case IR::Opcode::ConvertF16U64:
114 case IR::Opcode::FPAbs16:
115 case IR::Opcode::FPAdd16:
116 case IR::Opcode::FPCeil16:
117 case IR::Opcode::FPFloor16:
118 case IR::Opcode::FPFma16:
119 case IR::Opcode::FPMul16:
120 case IR::Opcode::FPNeg16:
121 case IR::Opcode::FPRoundEven16:
122 case IR::Opcode::FPSaturate16:
123 case IR::Opcode::FPClamp16:
124 case IR::Opcode::FPTrunc16:
125 case IR::Opcode::FPOrdEqual16:
126 case IR::Opcode::FPUnordEqual16:
127 case IR::Opcode::FPOrdNotEqual16:
128 case IR::Opcode::FPUnordNotEqual16:
129 case IR::Opcode::FPOrdLessThan16:
130 case IR::Opcode::FPUnordLessThan16:
131 case IR::Opcode::FPOrdGreaterThan16:
132 case IR::Opcode::FPUnordGreaterThan16:
133 case IR::Opcode::FPOrdLessThanEqual16:
134 case IR::Opcode::FPUnordLessThanEqual16:
135 case IR::Opcode::FPOrdGreaterThanEqual16:
136 case IR::Opcode::FPUnordGreaterThanEqual16:
137 case IR::Opcode::FPIsNan16:
138 case IR::Opcode::GlobalAtomicAddF16x2:
139 case IR::Opcode::GlobalAtomicMinF16x2:
140 case IR::Opcode::GlobalAtomicMaxF16x2:
141 case IR::Opcode::StorageAtomicAddF16x2:
142 case IR::Opcode::StorageAtomicMinF16x2:
143 case IR::Opcode::StorageAtomicMaxF16x2:
144 info.uses_fp16 = true;
145 break;
146 case IR::Opcode::CompositeConstructF64x2:
147 case IR::Opcode::CompositeConstructF64x3:
148 case IR::Opcode::CompositeConstructF64x4:
149 case IR::Opcode::CompositeExtractF64x2:
150 case IR::Opcode::CompositeExtractF64x3:
151 case IR::Opcode::CompositeExtractF64x4:
152 case IR::Opcode::CompositeInsertF64x2:
153 case IR::Opcode::CompositeInsertF64x3:
154 case IR::Opcode::CompositeInsertF64x4:
155 case IR::Opcode::SelectF64:
156 case IR::Opcode::BitCastU64F64:
157 case IR::Opcode::BitCastF64U64:
158 case IR::Opcode::PackDouble2x32:
159 case IR::Opcode::UnpackDouble2x32:
160 case IR::Opcode::FPAbs64:
161 case IR::Opcode::FPAdd64:
162 case IR::Opcode::FPCeil64:
163 case IR::Opcode::FPFloor64:
164 case IR::Opcode::FPFma64:
165 case IR::Opcode::FPMax64:
166 case IR::Opcode::FPMin64:
167 case IR::Opcode::FPMul64:
168 case IR::Opcode::FPNeg64:
169 case IR::Opcode::FPRecip64:
170 case IR::Opcode::FPRecipSqrt64:
171 case IR::Opcode::FPRoundEven64:
172 case IR::Opcode::FPSaturate64:
173 case IR::Opcode::FPClamp64:
174 case IR::Opcode::FPTrunc64:
175 case IR::Opcode::FPOrdEqual64:
176 case IR::Opcode::FPUnordEqual64:
177 case IR::Opcode::FPOrdNotEqual64:
178 case IR::Opcode::FPUnordNotEqual64:
179 case IR::Opcode::FPOrdLessThan64:
180 case IR::Opcode::FPUnordLessThan64:
181 case IR::Opcode::FPOrdGreaterThan64:
182 case IR::Opcode::FPUnordGreaterThan64:
183 case IR::Opcode::FPOrdLessThanEqual64:
184 case IR::Opcode::FPUnordLessThanEqual64:
185 case IR::Opcode::FPOrdGreaterThanEqual64:
186 case IR::Opcode::FPUnordGreaterThanEqual64:
187 case IR::Opcode::FPIsNan64:
188 case IR::Opcode::ConvertS16F64:
189 case IR::Opcode::ConvertS32F64:
190 case IR::Opcode::ConvertS64F64:
191 case IR::Opcode::ConvertU16F64:
192 case IR::Opcode::ConvertU32F64:
193 case IR::Opcode::ConvertU64F64:
194 case IR::Opcode::ConvertF32F64:
195 case IR::Opcode::ConvertF64F32:
196 case IR::Opcode::ConvertF64S8:
197 case IR::Opcode::ConvertF64S16:
198 case IR::Opcode::ConvertF64S32:
199 case IR::Opcode::ConvertF64S64:
200 case IR::Opcode::ConvertF64U8:
201 case IR::Opcode::ConvertF64U16:
202 case IR::Opcode::ConvertF64U32:
203 case IR::Opcode::ConvertF64U64:
204 info.uses_fp64 = true;
205 break;
206 default:
207 break;
208 }
209 switch (inst.GetOpcode()) {
210 case IR::Opcode::GetCbufU8:
211 case IR::Opcode::GetCbufS8:
212 case IR::Opcode::UndefU8:
213 case IR::Opcode::LoadGlobalU8:
214 case IR::Opcode::LoadGlobalS8:
215 case IR::Opcode::WriteGlobalU8:
216 case IR::Opcode::WriteGlobalS8:
217 case IR::Opcode::LoadStorageU8:
218 case IR::Opcode::LoadStorageS8:
219 case IR::Opcode::WriteStorageU8:
220 case IR::Opcode::WriteStorageS8:
221 case IR::Opcode::LoadSharedU8:
222 case IR::Opcode::LoadSharedS8:
223 case IR::Opcode::WriteSharedU8:
224 case IR::Opcode::SelectU8:
225 case IR::Opcode::ConvertF16S8:
226 case IR::Opcode::ConvertF16U8:
227 case IR::Opcode::ConvertF32S8:
228 case IR::Opcode::ConvertF32U8:
229 case IR::Opcode::ConvertF64S8:
230 case IR::Opcode::ConvertF64U8:
231 info.uses_int8 = true;
232 break;
233 default:
234 break;
235 }
236 switch (inst.GetOpcode()) {
237 case IR::Opcode::GetCbufU16:
238 case IR::Opcode::GetCbufS16:
239 case IR::Opcode::UndefU16:
240 case IR::Opcode::LoadGlobalU16:
241 case IR::Opcode::LoadGlobalS16:
242 case IR::Opcode::WriteGlobalU16:
243 case IR::Opcode::WriteGlobalS16:
244 case IR::Opcode::LoadStorageU16:
245 case IR::Opcode::LoadStorageS16:
246 case IR::Opcode::WriteStorageU16:
247 case IR::Opcode::WriteStorageS16:
248 case IR::Opcode::LoadSharedU16:
249 case IR::Opcode::LoadSharedS16:
250 case IR::Opcode::WriteSharedU16:
251 case IR::Opcode::SelectU16:
252 case IR::Opcode::BitCastU16F16:
253 case IR::Opcode::BitCastF16U16:
254 case IR::Opcode::ConvertS16F16:
255 case IR::Opcode::ConvertS16F32:
256 case IR::Opcode::ConvertS16F64:
257 case IR::Opcode::ConvertU16F16:
258 case IR::Opcode::ConvertU16F32:
259 case IR::Opcode::ConvertU16F64:
260 case IR::Opcode::ConvertF16S16:
261 case IR::Opcode::ConvertF16U16:
262 case IR::Opcode::ConvertF32S16:
263 case IR::Opcode::ConvertF32U16:
264 case IR::Opcode::ConvertF64S16:
265 case IR::Opcode::ConvertF64U16:
266 info.uses_int16 = true;
267 break;
268 default:
269 break;
270 }
271 switch (inst.GetOpcode()) {
272 case IR::Opcode::UndefU64:
273 case IR::Opcode::LoadGlobalU8:
274 case IR::Opcode::LoadGlobalS8:
275 case IR::Opcode::LoadGlobalU16:
276 case IR::Opcode::LoadGlobalS16:
277 case IR::Opcode::LoadGlobal32:
278 case IR::Opcode::LoadGlobal64:
279 case IR::Opcode::LoadGlobal128:
280 case IR::Opcode::WriteGlobalU8:
281 case IR::Opcode::WriteGlobalS8:
282 case IR::Opcode::WriteGlobalU16:
283 case IR::Opcode::WriteGlobalS16:
284 case IR::Opcode::WriteGlobal32:
285 case IR::Opcode::WriteGlobal64:
286 case IR::Opcode::WriteGlobal128:
287 case IR::Opcode::SelectU64:
288 case IR::Opcode::BitCastU64F64:
289 case IR::Opcode::BitCastF64U64:
290 case IR::Opcode::PackUint2x32:
291 case IR::Opcode::UnpackUint2x32:
292 case IR::Opcode::IAdd64:
293 case IR::Opcode::ISub64:
294 case IR::Opcode::INeg64:
295 case IR::Opcode::ShiftLeftLogical64:
296 case IR::Opcode::ShiftRightLogical64:
297 case IR::Opcode::ShiftRightArithmetic64:
298 case IR::Opcode::ConvertS64F16:
299 case IR::Opcode::ConvertS64F32:
300 case IR::Opcode::ConvertS64F64:
301 case IR::Opcode::ConvertU64F16:
302 case IR::Opcode::ConvertU64F32:
303 case IR::Opcode::ConvertU64F64:
304 case IR::Opcode::ConvertU64U32:
305 case IR::Opcode::ConvertU32U64:
306 case IR::Opcode::ConvertF16U64:
307 case IR::Opcode::ConvertF32U64:
308 case IR::Opcode::ConvertF64U64:
309 case IR::Opcode::SharedAtomicExchange64:
310 case IR::Opcode::GlobalAtomicIAdd64:
311 case IR::Opcode::GlobalAtomicSMin64:
312 case IR::Opcode::GlobalAtomicUMin64:
313 case IR::Opcode::GlobalAtomicSMax64:
314 case IR::Opcode::GlobalAtomicUMax64:
315 case IR::Opcode::GlobalAtomicAnd64:
316 case IR::Opcode::GlobalAtomicOr64:
317 case IR::Opcode::GlobalAtomicXor64:
318 case IR::Opcode::GlobalAtomicExchange64:
319 case IR::Opcode::StorageAtomicIAdd64:
320 case IR::Opcode::StorageAtomicSMin64:
321 case IR::Opcode::StorageAtomicUMin64:
322 case IR::Opcode::StorageAtomicSMax64:
323 case IR::Opcode::StorageAtomicUMax64:
324 case IR::Opcode::StorageAtomicAnd64:
325 case IR::Opcode::StorageAtomicOr64:
326 case IR::Opcode::StorageAtomicXor64:
327 case IR::Opcode::StorageAtomicExchange64:
328 info.uses_int64 = true;
329 break;
330 default:
331 break;
332 }
333 switch (inst.GetOpcode()) {
334 case IR::Opcode::WriteGlobalU8:
335 case IR::Opcode::WriteGlobalS8:
336 case IR::Opcode::WriteGlobalU16:
337 case IR::Opcode::WriteGlobalS16:
338 case IR::Opcode::WriteGlobal32:
339 case IR::Opcode::WriteGlobal64:
340 case IR::Opcode::WriteGlobal128:
341 case IR::Opcode::GlobalAtomicIAdd32:
342 case IR::Opcode::GlobalAtomicSMin32:
343 case IR::Opcode::GlobalAtomicUMin32:
344 case IR::Opcode::GlobalAtomicSMax32:
345 case IR::Opcode::GlobalAtomicUMax32:
346 case IR::Opcode::GlobalAtomicInc32:
347 case IR::Opcode::GlobalAtomicDec32:
348 case IR::Opcode::GlobalAtomicAnd32:
349 case IR::Opcode::GlobalAtomicOr32:
350 case IR::Opcode::GlobalAtomicXor32:
351 case IR::Opcode::GlobalAtomicExchange32:
352 case IR::Opcode::GlobalAtomicIAdd64:
353 case IR::Opcode::GlobalAtomicSMin64:
354 case IR::Opcode::GlobalAtomicUMin64:
355 case IR::Opcode::GlobalAtomicSMax64:
356 case IR::Opcode::GlobalAtomicUMax64:
357 case IR::Opcode::GlobalAtomicAnd64:
358 case IR::Opcode::GlobalAtomicOr64:
359 case IR::Opcode::GlobalAtomicXor64:
360 case IR::Opcode::GlobalAtomicExchange64:
361 case IR::Opcode::GlobalAtomicAddF32:
362 case IR::Opcode::GlobalAtomicAddF16x2:
363 case IR::Opcode::GlobalAtomicAddF32x2:
364 case IR::Opcode::GlobalAtomicMinF16x2:
365 case IR::Opcode::GlobalAtomicMinF32x2:
366 case IR::Opcode::GlobalAtomicMaxF16x2:
367 case IR::Opcode::GlobalAtomicMaxF32x2:
368 info.stores_global_memory = true;
369 [[fallthrough]];
370 case IR::Opcode::LoadGlobalU8:
371 case IR::Opcode::LoadGlobalS8:
372 case IR::Opcode::LoadGlobalU16:
373 case IR::Opcode::LoadGlobalS16:
374 case IR::Opcode::LoadGlobal32:
375 case IR::Opcode::LoadGlobal64:
376 case IR::Opcode::LoadGlobal128:
377 info.uses_int64 = true;
378 info.uses_global_memory = true;
379 info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2;
380 info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4;
381 break;
382 default:
383 break;
384 }
385 switch (inst.GetOpcode()) {
386 case IR::Opcode::DemoteToHelperInvocation:
387 info.uses_demote_to_helper_invocation = true;
388 break;
389 case IR::Opcode::GetAttribute:
390 info.loads.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true;
391 break;
392 case IR::Opcode::SetAttribute:
393 info.stores.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true;
394 break;
395 case IR::Opcode::GetPatch:
396 GetPatch(info, inst.Arg(0).Patch());
397 break;
398 case IR::Opcode::SetPatch:
399 SetPatch(info, inst.Arg(0).Patch());
400 break;
401 case IR::Opcode::GetAttributeIndexed:
402 info.loads_indexed_attributes = true;
403 break;
404 case IR::Opcode::SetAttributeIndexed:
405 info.stores_indexed_attributes = true;
406 break;
407 case IR::Opcode::SetFragColor:
408 info.stores_frag_color[inst.Arg(0).U32()] = true;
409 break;
410 case IR::Opcode::SetSampleMask:
411 info.stores_sample_mask = true;
412 break;
413 case IR::Opcode::SetFragDepth:
414 info.stores_frag_depth = true;
415 break;
416 case IR::Opcode::WorkgroupId:
417 info.uses_workgroup_id = true;
418 break;
419 case IR::Opcode::LocalInvocationId:
420 info.uses_local_invocation_id = true;
421 break;
422 case IR::Opcode::InvocationId:
423 info.uses_invocation_id = true;
424 break;
425 case IR::Opcode::SampleId:
426 info.uses_sample_id = true;
427 break;
428 case IR::Opcode::IsHelperInvocation:
429 info.uses_is_helper_invocation = true;
430 break;
431 case IR::Opcode::LaneId:
432 info.uses_subgroup_invocation_id = true;
433 break;
434 case IR::Opcode::ShuffleIndex:
435 case IR::Opcode::ShuffleUp:
436 case IR::Opcode::ShuffleDown:
437 case IR::Opcode::ShuffleButterfly:
438 info.uses_subgroup_shuffles = true;
439 break;
440 case IR::Opcode::GetCbufU8:
441 case IR::Opcode::GetCbufS8:
442 case IR::Opcode::GetCbufU16:
443 case IR::Opcode::GetCbufS16:
444 case IR::Opcode::GetCbufU32:
445 case IR::Opcode::GetCbufF32:
446 case IR::Opcode::GetCbufU32x2: {
447 const IR::Value index{inst.Arg(0)};
448 const IR::Value offset{inst.Arg(1)};
449 if (!index.IsImmediate()) {
450 throw NotImplementedException("Constant buffer with non-immediate index");
451 }
452 AddConstantBufferDescriptor(info, index.U32(), 1);
453 u32 element_size{};
454 switch (inst.GetOpcode()) {
455 case IR::Opcode::GetCbufU8:
456 case IR::Opcode::GetCbufS8:
457 info.used_constant_buffer_types |= IR::Type::U8;
458 element_size = 1;
459 break;
460 case IR::Opcode::GetCbufU16:
461 case IR::Opcode::GetCbufS16:
462 info.used_constant_buffer_types |= IR::Type::U16;
463 element_size = 2;
464 break;
465 case IR::Opcode::GetCbufU32:
466 info.used_constant_buffer_types |= IR::Type::U32;
467 element_size = 4;
468 break;
469 case IR::Opcode::GetCbufF32:
470 info.used_constant_buffer_types |= IR::Type::F32;
471 element_size = 4;
472 break;
473 case IR::Opcode::GetCbufU32x2:
474 info.used_constant_buffer_types |= IR::Type::U32x2;
475 element_size = 8;
476 break;
477 default:
478 break;
479 }
480 u32& size{info.constant_buffer_used_sizes[index.U32()]};
481 if (offset.IsImmediate()) {
482 size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u);
483 } else {
484 size = 0x10'000;
485 }
486 break;
487 }
488 case IR::Opcode::BindlessImageSampleImplicitLod:
489 case IR::Opcode::BindlessImageSampleExplicitLod:
490 case IR::Opcode::BindlessImageSampleDrefImplicitLod:
491 case IR::Opcode::BindlessImageSampleDrefExplicitLod:
492 case IR::Opcode::BindlessImageGather:
493 case IR::Opcode::BindlessImageGatherDref:
494 case IR::Opcode::BindlessImageFetch:
495 case IR::Opcode::BindlessImageQueryDimensions:
496 case IR::Opcode::BindlessImageQueryLod:
497 case IR::Opcode::BindlessImageGradient:
498 case IR::Opcode::BoundImageSampleImplicitLod:
499 case IR::Opcode::BoundImageSampleExplicitLod:
500 case IR::Opcode::BoundImageSampleDrefImplicitLod:
501 case IR::Opcode::BoundImageSampleDrefExplicitLod:
502 case IR::Opcode::BoundImageGather:
503 case IR::Opcode::BoundImageGatherDref:
504 case IR::Opcode::BoundImageFetch:
505 case IR::Opcode::BoundImageQueryDimensions:
506 case IR::Opcode::BoundImageQueryLod:
507 case IR::Opcode::BoundImageGradient:
508 case IR::Opcode::ImageGather:
509 case IR::Opcode::ImageGatherDref:
510 case IR::Opcode::ImageFetch:
511 case IR::Opcode::ImageQueryDimensions:
512 case IR::Opcode::ImageGradient: {
513 const TextureType type{inst.Flags<IR::TextureInstInfo>().type};
514 info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D;
515 info.uses_sparse_residency |=
516 inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
517 break;
518 }
519 case IR::Opcode::ImageSampleImplicitLod:
520 case IR::Opcode::ImageSampleExplicitLod:
521 case IR::Opcode::ImageSampleDrefImplicitLod:
522 case IR::Opcode::ImageSampleDrefExplicitLod:
523 case IR::Opcode::ImageQueryLod: {
524 const auto flags{inst.Flags<IR::TextureInstInfo>()};
525 const TextureType type{flags.type};
526 info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D;
527 info.uses_shadow_lod |= flags.is_depth != 0;
528 info.uses_sparse_residency |=
529 inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
530 break;
531 }
532 case IR::Opcode::ImageRead: {
533 const auto flags{inst.Flags<IR::TextureInstInfo>()};
534 info.uses_typeless_image_reads |= flags.image_format == ImageFormat::Typeless;
535 info.uses_sparse_residency |=
536 inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
537 break;
538 }
539 case IR::Opcode::ImageWrite: {
540 const auto flags{inst.Flags<IR::TextureInstInfo>()};
541 info.uses_typeless_image_writes |= flags.image_format == ImageFormat::Typeless;
542 info.uses_image_buffers |= flags.type == TextureType::Buffer;
543 break;
544 }
545 case IR::Opcode::SubgroupEqMask:
546 case IR::Opcode::SubgroupLtMask:
547 case IR::Opcode::SubgroupLeMask:
548 case IR::Opcode::SubgroupGtMask:
549 case IR::Opcode::SubgroupGeMask:
550 info.uses_subgroup_mask = true;
551 break;
552 case IR::Opcode::VoteAll:
553 case IR::Opcode::VoteAny:
554 case IR::Opcode::VoteEqual:
555 case IR::Opcode::SubgroupBallot:
556 info.uses_subgroup_vote = true;
557 break;
558 case IR::Opcode::FSwizzleAdd:
559 info.uses_fswzadd = true;
560 break;
561 case IR::Opcode::DPdxFine:
562 case IR::Opcode::DPdyFine:
563 case IR::Opcode::DPdxCoarse:
564 case IR::Opcode::DPdyCoarse:
565 info.uses_derivatives = true;
566 break;
567 case IR::Opcode::LoadStorageU8:
568 case IR::Opcode::LoadStorageS8:
569 case IR::Opcode::WriteStorageU8:
570 case IR::Opcode::WriteStorageS8:
571 info.used_storage_buffer_types |= IR::Type::U8;
572 break;
573 case IR::Opcode::LoadStorageU16:
574 case IR::Opcode::LoadStorageS16:
575 case IR::Opcode::WriteStorageU16:
576 case IR::Opcode::WriteStorageS16:
577 info.used_storage_buffer_types |= IR::Type::U16;
578 break;
579 case IR::Opcode::LoadStorage32:
580 case IR::Opcode::WriteStorage32:
581 case IR::Opcode::StorageAtomicIAdd32:
582 case IR::Opcode::StorageAtomicUMin32:
583 case IR::Opcode::StorageAtomicUMax32:
584 case IR::Opcode::StorageAtomicAnd32:
585 case IR::Opcode::StorageAtomicOr32:
586 case IR::Opcode::StorageAtomicXor32:
587 case IR::Opcode::StorageAtomicExchange32:
588 info.used_storage_buffer_types |= IR::Type::U32;
589 break;
590 case IR::Opcode::LoadStorage64:
591 case IR::Opcode::WriteStorage64:
592 info.used_storage_buffer_types |= IR::Type::U32x2;
593 break;
594 case IR::Opcode::LoadStorage128:
595 case IR::Opcode::WriteStorage128:
596 info.used_storage_buffer_types |= IR::Type::U32x4;
597 break;
598 case IR::Opcode::SharedAtomicSMin32:
599 info.uses_atomic_s32_min = true;
600 break;
601 case IR::Opcode::SharedAtomicSMax32:
602 info.uses_atomic_s32_max = true;
603 break;
604 case IR::Opcode::SharedAtomicInc32:
605 info.uses_shared_increment = true;
606 break;
607 case IR::Opcode::SharedAtomicDec32:
608 info.uses_shared_decrement = true;
609 break;
610 case IR::Opcode::SharedAtomicExchange64:
611 info.uses_int64_bit_atomics = true;
612 break;
613 case IR::Opcode::GlobalAtomicInc32:
614 case IR::Opcode::StorageAtomicInc32:
615 info.used_storage_buffer_types |= IR::Type::U32;
616 info.uses_global_increment = true;
617 break;
618 case IR::Opcode::GlobalAtomicDec32:
619 case IR::Opcode::StorageAtomicDec32:
620 info.used_storage_buffer_types |= IR::Type::U32;
621 info.uses_global_decrement = true;
622 break;
623 case IR::Opcode::GlobalAtomicAddF32:
624 case IR::Opcode::StorageAtomicAddF32:
625 info.used_storage_buffer_types |= IR::Type::U32;
626 info.uses_atomic_f32_add = true;
627 break;
628 case IR::Opcode::GlobalAtomicAddF16x2:
629 case IR::Opcode::StorageAtomicAddF16x2:
630 info.used_storage_buffer_types |= IR::Type::U32;
631 info.uses_atomic_f16x2_add = true;
632 break;
633 case IR::Opcode::GlobalAtomicAddF32x2:
634 case IR::Opcode::StorageAtomicAddF32x2:
635 info.used_storage_buffer_types |= IR::Type::U32;
636 info.uses_atomic_f32x2_add = true;
637 break;
638 case IR::Opcode::GlobalAtomicMinF16x2:
639 case IR::Opcode::StorageAtomicMinF16x2:
640 info.used_storage_buffer_types |= IR::Type::U32;
641 info.uses_atomic_f16x2_min = true;
642 break;
643 case IR::Opcode::GlobalAtomicMinF32x2:
644 case IR::Opcode::StorageAtomicMinF32x2:
645 info.used_storage_buffer_types |= IR::Type::U32;
646 info.uses_atomic_f32x2_min = true;
647 break;
648 case IR::Opcode::GlobalAtomicMaxF16x2:
649 case IR::Opcode::StorageAtomicMaxF16x2:
650 info.used_storage_buffer_types |= IR::Type::U32;
651 info.uses_atomic_f16x2_max = true;
652 break;
653 case IR::Opcode::GlobalAtomicMaxF32x2:
654 case IR::Opcode::StorageAtomicMaxF32x2:
655 info.used_storage_buffer_types |= IR::Type::U32;
656 info.uses_atomic_f32x2_max = true;
657 break;
658 case IR::Opcode::StorageAtomicSMin32:
659 info.used_storage_buffer_types |= IR::Type::U32;
660 info.uses_atomic_s32_min = true;
661 break;
662 case IR::Opcode::StorageAtomicSMax32:
663 info.used_storage_buffer_types |= IR::Type::U32;
664 info.uses_atomic_s32_max = true;
665 break;
666 case IR::Opcode::GlobalAtomicIAdd64:
667 case IR::Opcode::GlobalAtomicSMin64:
668 case IR::Opcode::GlobalAtomicUMin64:
669 case IR::Opcode::GlobalAtomicSMax64:
670 case IR::Opcode::GlobalAtomicUMax64:
671 case IR::Opcode::GlobalAtomicAnd64:
672 case IR::Opcode::GlobalAtomicOr64:
673 case IR::Opcode::GlobalAtomicXor64:
674 case IR::Opcode::GlobalAtomicExchange64:
675 case IR::Opcode::StorageAtomicIAdd64:
676 case IR::Opcode::StorageAtomicSMin64:
677 case IR::Opcode::StorageAtomicUMin64:
678 case IR::Opcode::StorageAtomicSMax64:
679 case IR::Opcode::StorageAtomicUMax64:
680 case IR::Opcode::StorageAtomicAnd64:
681 case IR::Opcode::StorageAtomicOr64:
682 case IR::Opcode::StorageAtomicXor64:
683 info.used_storage_buffer_types |= IR::Type::U64;
684 info.uses_int64_bit_atomics = true;
685 break;
686 case IR::Opcode::BindlessImageAtomicIAdd32:
687 case IR::Opcode::BindlessImageAtomicSMin32:
688 case IR::Opcode::BindlessImageAtomicUMin32:
689 case IR::Opcode::BindlessImageAtomicSMax32:
690 case IR::Opcode::BindlessImageAtomicUMax32:
691 case IR::Opcode::BindlessImageAtomicInc32:
692 case IR::Opcode::BindlessImageAtomicDec32:
693 case IR::Opcode::BindlessImageAtomicAnd32:
694 case IR::Opcode::BindlessImageAtomicOr32:
695 case IR::Opcode::BindlessImageAtomicXor32:
696 case IR::Opcode::BindlessImageAtomicExchange32:
697 case IR::Opcode::BoundImageAtomicIAdd32:
698 case IR::Opcode::BoundImageAtomicSMin32:
699 case IR::Opcode::BoundImageAtomicUMin32:
700 case IR::Opcode::BoundImageAtomicSMax32:
701 case IR::Opcode::BoundImageAtomicUMax32:
702 case IR::Opcode::BoundImageAtomicInc32:
703 case IR::Opcode::BoundImageAtomicDec32:
704 case IR::Opcode::BoundImageAtomicAnd32:
705 case IR::Opcode::BoundImageAtomicOr32:
706 case IR::Opcode::BoundImageAtomicXor32:
707 case IR::Opcode::BoundImageAtomicExchange32:
708 case IR::Opcode::ImageAtomicIAdd32:
709 case IR::Opcode::ImageAtomicSMin32:
710 case IR::Opcode::ImageAtomicUMin32:
711 case IR::Opcode::ImageAtomicSMax32:
712 case IR::Opcode::ImageAtomicUMax32:
713 case IR::Opcode::ImageAtomicInc32:
714 case IR::Opcode::ImageAtomicDec32:
715 case IR::Opcode::ImageAtomicAnd32:
716 case IR::Opcode::ImageAtomicOr32:
717 case IR::Opcode::ImageAtomicXor32:
718 case IR::Opcode::ImageAtomicExchange32:
719 info.uses_atomic_image_u32 = true;
720 break;
721 default:
722 break;
723 }
724}
725
726void VisitFpModifiers(Info& info, IR::Inst& inst) {
727 switch (inst.GetOpcode()) {
728 case IR::Opcode::FPAdd16:
729 case IR::Opcode::FPFma16:
730 case IR::Opcode::FPMul16:
731 case IR::Opcode::FPRoundEven16:
732 case IR::Opcode::FPFloor16:
733 case IR::Opcode::FPCeil16:
734 case IR::Opcode::FPTrunc16: {
735 const auto control{inst.Flags<IR::FpControl>()};
736 switch (control.fmz_mode) {
737 case IR::FmzMode::DontCare:
738 break;
739 case IR::FmzMode::FTZ:
740 case IR::FmzMode::FMZ:
741 info.uses_fp16_denorms_flush = true;
742 break;
743 case IR::FmzMode::None:
744 info.uses_fp16_denorms_preserve = true;
745 break;
746 }
747 break;
748 }
749 case IR::Opcode::FPAdd32:
750 case IR::Opcode::FPFma32:
751 case IR::Opcode::FPMul32:
752 case IR::Opcode::FPRoundEven32:
753 case IR::Opcode::FPFloor32:
754 case IR::Opcode::FPCeil32:
755 case IR::Opcode::FPTrunc32:
756 case IR::Opcode::FPOrdEqual32:
757 case IR::Opcode::FPUnordEqual32:
758 case IR::Opcode::FPOrdNotEqual32:
759 case IR::Opcode::FPUnordNotEqual32:
760 case IR::Opcode::FPOrdLessThan32:
761 case IR::Opcode::FPUnordLessThan32:
762 case IR::Opcode::FPOrdGreaterThan32:
763 case IR::Opcode::FPUnordGreaterThan32:
764 case IR::Opcode::FPOrdLessThanEqual32:
765 case IR::Opcode::FPUnordLessThanEqual32:
766 case IR::Opcode::FPOrdGreaterThanEqual32:
767 case IR::Opcode::FPUnordGreaterThanEqual32:
768 case IR::Opcode::ConvertF16F32:
769 case IR::Opcode::ConvertF64F32: {
770 const auto control{inst.Flags<IR::FpControl>()};
771 switch (control.fmz_mode) {
772 case IR::FmzMode::DontCare:
773 break;
774 case IR::FmzMode::FTZ:
775 case IR::FmzMode::FMZ:
776 info.uses_fp32_denorms_flush = true;
777 break;
778 case IR::FmzMode::None:
779 info.uses_fp32_denorms_preserve = true;
780 break;
781 }
782 break;
783 }
784 default:
785 break;
786 }
787}
788
789void VisitCbufs(Info& info, IR::Inst& inst) {
790 switch (inst.GetOpcode()) {
791 case IR::Opcode::GetCbufU8:
792 case IR::Opcode::GetCbufS8:
793 case IR::Opcode::GetCbufU16:
794 case IR::Opcode::GetCbufS16:
795 case IR::Opcode::GetCbufU32:
796 case IR::Opcode::GetCbufF32:
797 case IR::Opcode::GetCbufU32x2: {
798 CheckCBufNVN(info, inst);
799 break;
800 }
801 default:
802 break;
803 }
804}
805
806void Visit(Info& info, IR::Inst& inst) {
807 VisitUsages(info, inst);
808 VisitFpModifiers(info, inst);
809 VisitCbufs(info, inst);
810}
811
812void GatherInfoFromHeader(Environment& env, Info& info) {
813 Stage stage{env.ShaderStage()};
814 if (stage == Stage::Compute) {
815 return;
816 }
817 const auto& header{env.SPH()};
818 if (stage == Stage::Fragment) {
819 if (!info.loads_indexed_attributes) {
820 return;
821 }
822 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
823 const size_t offset{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
824 const auto vector{header.ps.imap_generic_vector[index]};
825 info.loads.mask[offset + 0] = vector.x != PixelImap::Unused;
826 info.loads.mask[offset + 1] = vector.y != PixelImap::Unused;
827 info.loads.mask[offset + 2] = vector.z != PixelImap::Unused;
828 info.loads.mask[offset + 3] = vector.w != PixelImap::Unused;
829 }
830 return;
831 }
832 if (info.loads_indexed_attributes) {
833 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
834 const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4};
835 const auto mask = header.vtg.InputGeneric(index);
836 for (size_t i = 0; i < 4; ++i) {
837 info.loads.Set(attribute + i, mask[i]);
838 }
839 }
840 for (size_t index = 0; index < 8; ++index) {
841 const u16 mask{header.vtg.clip_distances};
842 info.loads.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0);
843 }
844 info.loads.Set(IR::Attribute::PrimitiveId, header.vtg.imap_systemb.primitive_array_id != 0);
845 info.loads.Set(IR::Attribute::Layer, header.vtg.imap_systemb.rt_array_index != 0);
846 info.loads.Set(IR::Attribute::ViewportIndex, header.vtg.imap_systemb.viewport_index != 0);
847 info.loads.Set(IR::Attribute::PointSize, header.vtg.imap_systemb.point_size != 0);
848 info.loads.Set(IR::Attribute::PositionX, header.vtg.imap_systemb.position_x != 0);
849 info.loads.Set(IR::Attribute::PositionY, header.vtg.imap_systemb.position_y != 0);
850 info.loads.Set(IR::Attribute::PositionZ, header.vtg.imap_systemb.position_z != 0);
851 info.loads.Set(IR::Attribute::PositionW, header.vtg.imap_systemb.position_w != 0);
852 info.loads.Set(IR::Attribute::PointSpriteS, header.vtg.point_sprite_s != 0);
853 info.loads.Set(IR::Attribute::PointSpriteT, header.vtg.point_sprite_t != 0);
854 info.loads.Set(IR::Attribute::FogCoordinate, header.vtg.fog_coordinate != 0);
855 info.loads.Set(IR::Attribute::TessellationEvaluationPointU,
856 header.vtg.tessellation_eval_point_u != 0);
857 info.loads.Set(IR::Attribute::TessellationEvaluationPointV,
858 header.vtg.tessellation_eval_point_v != 0);
859 info.loads.Set(IR::Attribute::InstanceId, header.vtg.instance_id != 0);
860 info.loads.Set(IR::Attribute::VertexId, header.vtg.vertex_id != 0);
861 // TODO: Legacy varyings
862 }
863 if (info.stores_indexed_attributes) {
864 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
865 const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4};
866 const auto mask{header.vtg.OutputGeneric(index)};
867 for (size_t i = 0; i < 4; ++i) {
868 info.stores.Set(attribute + i, mask[i]);
869 }
870 }
871 for (size_t index = 0; index < 8; ++index) {
872 const u16 mask{header.vtg.omap_systemc.clip_distances};
873 info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0);
874 }
875 info.stores.Set(IR::Attribute::PrimitiveId,
876 header.vtg.omap_systemb.primitive_array_id != 0);
877 info.stores.Set(IR::Attribute::Layer, header.vtg.omap_systemb.rt_array_index != 0);
878 info.stores.Set(IR::Attribute::ViewportIndex, header.vtg.omap_systemb.viewport_index != 0);
879 info.stores.Set(IR::Attribute::PointSize, header.vtg.omap_systemb.point_size != 0);
880 info.stores.Set(IR::Attribute::PositionX, header.vtg.omap_systemb.position_x != 0);
881 info.stores.Set(IR::Attribute::PositionY, header.vtg.omap_systemb.position_y != 0);
882 info.stores.Set(IR::Attribute::PositionZ, header.vtg.omap_systemb.position_z != 0);
883 info.stores.Set(IR::Attribute::PositionW, header.vtg.omap_systemb.position_w != 0);
884 info.stores.Set(IR::Attribute::PointSpriteS, header.vtg.omap_systemc.point_sprite_s != 0);
885 info.stores.Set(IR::Attribute::PointSpriteT, header.vtg.omap_systemc.point_sprite_t != 0);
886 info.stores.Set(IR::Attribute::FogCoordinate, header.vtg.omap_systemc.fog_coordinate != 0);
887 info.stores.Set(IR::Attribute::TessellationEvaluationPointU,
888 header.vtg.omap_systemc.tessellation_eval_point_u != 0);
889 info.stores.Set(IR::Attribute::TessellationEvaluationPointV,
890 header.vtg.omap_systemc.tessellation_eval_point_v != 0);
891 info.stores.Set(IR::Attribute::InstanceId, header.vtg.omap_systemc.instance_id != 0);
892 info.stores.Set(IR::Attribute::VertexId, header.vtg.omap_systemc.vertex_id != 0);
893 // TODO: Legacy varyings
894 }
895}
896} // Anonymous namespace
897
898void CollectShaderInfoPass(Environment& env, IR::Program& program) {
899 Info& info{program.info};
900 const u32 base{[&] {
901 switch (program.stage) {
902 case Stage::VertexA:
903 case Stage::VertexB:
904 return 0x110u;
905 case Stage::TessellationControl:
906 return 0x210u;
907 case Stage::TessellationEval:
908 return 0x310u;
909 case Stage::Geometry:
910 return 0x410u;
911 case Stage::Fragment:
912 return 0x510u;
913 case Stage::Compute:
914 return 0x310u;
915 }
916 throw InvalidArgument("Invalid stage {}", program.stage);
917 }()};
918 info.nvn_buffer_base = base;
919
920 for (IR::Block* const block : program.post_order_blocks) {
921 for (IR::Inst& inst : block->Instructions()) {
922 Visit(info, inst);
923 }
924 }
925 GatherInfoFromHeader(env, info);
926}
927
928} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
new file mode 100644
index 000000000..8dd6d6c2c
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -0,0 +1,610 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <tuple>
7#include <type_traits>
8
9#include "common/bit_cast.h"
10#include "common/bit_util.h"
11#include "shader_recompiler/exception.h"
12#include "shader_recompiler/frontend/ir/ir_emitter.h"
13#include "shader_recompiler/frontend/ir/value.h"
14#include "shader_recompiler/ir_opt/passes.h"
15
16namespace Shader::Optimization {
17namespace {
18// Metaprogramming stuff to get arguments information out of a lambda
19template <typename Func>
20struct LambdaTraits : LambdaTraits<decltype(&std::remove_reference_t<Func>::operator())> {};
21
22template <typename ReturnType, typename LambdaType, typename... Args>
23struct LambdaTraits<ReturnType (LambdaType::*)(Args...) const> {
24 template <size_t I>
25 using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
26
27 static constexpr size_t NUM_ARGS{sizeof...(Args)};
28};
29
30template <typename T>
31[[nodiscard]] T Arg(const IR::Value& value) {
32 if constexpr (std::is_same_v<T, bool>) {
33 return value.U1();
34 } else if constexpr (std::is_same_v<T, u32>) {
35 return value.U32();
36 } else if constexpr (std::is_same_v<T, s32>) {
37 return static_cast<s32>(value.U32());
38 } else if constexpr (std::is_same_v<T, f32>) {
39 return value.F32();
40 } else if constexpr (std::is_same_v<T, u64>) {
41 return value.U64();
42 }
43}
44
45template <typename T, typename ImmFn>
46bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
47 const IR::Value lhs{inst.Arg(0)};
48 const IR::Value rhs{inst.Arg(1)};
49
50 const bool is_lhs_immediate{lhs.IsImmediate()};
51 const bool is_rhs_immediate{rhs.IsImmediate()};
52
53 if (is_lhs_immediate && is_rhs_immediate) {
54 const auto result{imm_fn(Arg<T>(lhs), Arg<T>(rhs))};
55 inst.ReplaceUsesWith(IR::Value{result});
56 return false;
57 }
58 if (is_lhs_immediate && !is_rhs_immediate) {
59 IR::Inst* const rhs_inst{rhs.InstRecursive()};
60 if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) {
61 const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))};
62 inst.SetArg(0, rhs_inst->Arg(0));
63 inst.SetArg(1, IR::Value{combined});
64 } else {
65 // Normalize
66 inst.SetArg(0, rhs);
67 inst.SetArg(1, lhs);
68 }
69 }
70 if (!is_lhs_immediate && is_rhs_immediate) {
71 const IR::Inst* const lhs_inst{lhs.InstRecursive()};
72 if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) {
73 const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))};
74 inst.SetArg(0, lhs_inst->Arg(0));
75 inst.SetArg(1, IR::Value{combined});
76 }
77 }
78 return true;
79}
80
81template <typename Func>
82bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) {
83 if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) {
84 return false;
85 }
86 using Indices = std::make_index_sequence<LambdaTraits<decltype(func)>::NUM_ARGS>;
87 inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{}));
88 return true;
89}
90
91void FoldGetRegister(IR::Inst& inst) {
92 if (inst.Arg(0).Reg() == IR::Reg::RZ) {
93 inst.ReplaceUsesWith(IR::Value{u32{0}});
94 }
95}
96
97void FoldGetPred(IR::Inst& inst) {
98 if (inst.Arg(0).Pred() == IR::Pred::PT) {
99 inst.ReplaceUsesWith(IR::Value{true});
100 }
101}
102
103/// Replaces the pattern generated by two XMAD multiplications
104bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
105 /*
106 * We are looking for this pattern:
107 * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16
108 * %rhs_mul = IMul32 %rhs_bfe, %factor_b
109 * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16
110 * %rhs_mul = IMul32 %lhs_bfe, %factor_b
111 * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16
112 * %result = IAdd32 %lhs_shl, %rhs_mul
113 *
114 * And replacing it with
115 * %result = IMul32 %factor_a, %factor_b
116 *
117 * This optimization has been proven safe by LLVM and MSVC.
118 */
119 const IR::Value lhs_arg{inst.Arg(0)};
120 const IR::Value rhs_arg{inst.Arg(1)};
121 if (lhs_arg.IsImmediate() || rhs_arg.IsImmediate()) {
122 return false;
123 }
124 IR::Inst* const lhs_shl{lhs_arg.InstRecursive()};
125 if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
126 lhs_shl->Arg(1) != IR::Value{16U}) {
127 return false;
128 }
129 if (lhs_shl->Arg(0).IsImmediate()) {
130 return false;
131 }
132 IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()};
133 IR::Inst* const rhs_mul{rhs_arg.InstRecursive()};
134 if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) {
135 return false;
136 }
137 if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) {
138 return false;
139 }
140 const IR::U32 factor_b{lhs_mul->Arg(1)};
141 if (lhs_mul->Arg(0).IsImmediate() || rhs_mul->Arg(0).IsImmediate()) {
142 return false;
143 }
144 IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()};
145 IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()};
146 if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
147 return false;
148 }
149 if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
150 return false;
151 }
152 if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) {
153 return false;
154 }
155 if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) {
156 return false;
157 }
158 if (lhs_bfe->Arg(0).Resolve() != rhs_bfe->Arg(0).Resolve()) {
159 return false;
160 }
161 const IR::U32 factor_a{lhs_bfe->Arg(0)};
162 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
163 inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b));
164 return true;
165}
166
167template <typename T>
168void FoldAdd(IR::Block& block, IR::Inst& inst) {
169 if (inst.HasAssociatedPseudoOperation()) {
170 return;
171 }
172 if (!FoldCommutative<T>(inst, [](T a, T b) { return a + b; })) {
173 return;
174 }
175 const IR::Value rhs{inst.Arg(1)};
176 if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
177 inst.ReplaceUsesWith(inst.Arg(0));
178 return;
179 }
180 if constexpr (std::is_same_v<T, u32>) {
181 if (FoldXmadMultiply(block, inst)) {
182 return;
183 }
184 }
185}
186
187void FoldISub32(IR::Inst& inst) {
188 if (FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; })) {
189 return;
190 }
191 if (inst.Arg(0).IsImmediate() || inst.Arg(1).IsImmediate()) {
192 return;
193 }
194 // ISub32 is generally used to subtract two constant buffers, compare and replace this with
195 // zero if they equal.
196 const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) {
197 return a->GetOpcode() == IR::Opcode::GetCbufU32 &&
198 b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) &&
199 a->Arg(1) == b->Arg(1);
200 }};
201 IR::Inst* op_a{inst.Arg(0).InstRecursive()};
202 IR::Inst* op_b{inst.Arg(1).InstRecursive()};
203 if (equal_cbuf(op_a, op_b)) {
204 inst.ReplaceUsesWith(IR::Value{u32{0}});
205 return;
206 }
207 // It's also possible a value is being added to a cbuf and then subtracted
208 if (op_b->GetOpcode() == IR::Opcode::IAdd32) {
209 // Canonicalize local variables to simplify the following logic
210 std::swap(op_a, op_b);
211 }
212 if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) {
213 return;
214 }
215 IR::Inst* const inst_cbuf{op_b};
216 if (op_a->GetOpcode() != IR::Opcode::IAdd32) {
217 return;
218 }
219 IR::Value add_op_a{op_a->Arg(0)};
220 IR::Value add_op_b{op_a->Arg(1)};
221 if (add_op_b.IsImmediate()) {
222 // Canonicalize
223 std::swap(add_op_a, add_op_b);
224 }
225 if (add_op_b.IsImmediate()) {
226 return;
227 }
228 IR::Inst* const add_cbuf{add_op_b.InstRecursive()};
229 if (equal_cbuf(add_cbuf, inst_cbuf)) {
230 inst.ReplaceUsesWith(add_op_a);
231 }
232}
233
234void FoldSelect(IR::Inst& inst) {
235 const IR::Value cond{inst.Arg(0)};
236 if (cond.IsImmediate()) {
237 inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2));
238 }
239}
240
241void FoldFPMul32(IR::Inst& inst) {
242 const auto control{inst.Flags<IR::FpControl>()};
243 if (control.no_contraction) {
244 return;
245 }
246 // Fold interpolation operations
247 const IR::Value lhs_value{inst.Arg(0)};
248 const IR::Value rhs_value{inst.Arg(1)};
249 if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
250 return;
251 }
252 IR::Inst* const lhs_op{lhs_value.InstRecursive()};
253 IR::Inst* const rhs_op{rhs_value.InstRecursive()};
254 if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 ||
255 rhs_op->GetOpcode() != IR::Opcode::FPRecip32) {
256 return;
257 }
258 const IR::Value recip_source{rhs_op->Arg(0)};
259 const IR::Value lhs_mul_source{lhs_op->Arg(1).Resolve()};
260 if (recip_source.IsImmediate() || lhs_mul_source.IsImmediate()) {
261 return;
262 }
263 IR::Inst* const attr_a{recip_source.InstRecursive()};
264 IR::Inst* const attr_b{lhs_mul_source.InstRecursive()};
265 if (attr_a->GetOpcode() != IR::Opcode::GetAttribute ||
266 attr_b->GetOpcode() != IR::Opcode::GetAttribute) {
267 return;
268 }
269 if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) {
270 inst.ReplaceUsesWith(lhs_op->Arg(0));
271 }
272}
273
274void FoldLogicalAnd(IR::Inst& inst) {
275 if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a && b; })) {
276 return;
277 }
278 const IR::Value rhs{inst.Arg(1)};
279 if (rhs.IsImmediate()) {
280 if (rhs.U1()) {
281 inst.ReplaceUsesWith(inst.Arg(0));
282 } else {
283 inst.ReplaceUsesWith(IR::Value{false});
284 }
285 }
286}
287
288void FoldLogicalOr(IR::Inst& inst) {
289 if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a || b; })) {
290 return;
291 }
292 const IR::Value rhs{inst.Arg(1)};
293 if (rhs.IsImmediate()) {
294 if (rhs.U1()) {
295 inst.ReplaceUsesWith(IR::Value{true});
296 } else {
297 inst.ReplaceUsesWith(inst.Arg(0));
298 }
299 }
300}
301
302void FoldLogicalNot(IR::Inst& inst) {
303 const IR::U1 value{inst.Arg(0)};
304 if (value.IsImmediate()) {
305 inst.ReplaceUsesWith(IR::Value{!value.U1()});
306 return;
307 }
308 IR::Inst* const arg{value.InstRecursive()};
309 if (arg->GetOpcode() == IR::Opcode::LogicalNot) {
310 inst.ReplaceUsesWith(arg->Arg(0));
311 }
312}
313
314template <IR::Opcode op, typename Dest, typename Source>
315void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
316 const IR::Value value{inst.Arg(0)};
317 if (value.IsImmediate()) {
318 inst.ReplaceUsesWith(IR::Value{Common::BitCast<Dest>(Arg<Source>(value))});
319 return;
320 }
321 IR::Inst* const arg_inst{value.InstRecursive()};
322 if (arg_inst->GetOpcode() == reverse) {
323 inst.ReplaceUsesWith(arg_inst->Arg(0));
324 return;
325 }
326 if constexpr (op == IR::Opcode::BitCastF32U32) {
327 if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) {
328 // Replace the bitcast with a typed constant buffer read
329 inst.ReplaceOpcode(IR::Opcode::GetCbufF32);
330 inst.SetArg(0, arg_inst->Arg(0));
331 inst.SetArg(1, arg_inst->Arg(1));
332 return;
333 }
334 }
335}
336
337void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
338 const IR::Value value{inst.Arg(0)};
339 if (value.IsImmediate()) {
340 return;
341 }
342 IR::Inst* const arg_inst{value.InstRecursive()};
343 if (arg_inst->GetOpcode() == reverse) {
344 inst.ReplaceUsesWith(arg_inst->Arg(0));
345 return;
346 }
347}
348
349template <typename Func, size_t... I>
350IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) {
351 using Traits = LambdaTraits<decltype(func)>;
352 return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)};
353}
354
355std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
356 IR::Opcode construct, u32 first_index) {
357 IR::Inst* const inst{inst_value.InstRecursive()};
358 if (inst->GetOpcode() == construct) {
359 return inst->Arg(first_index);
360 }
361 if (inst->GetOpcode() != insert) {
362 return std::nullopt;
363 }
364 IR::Value value_index{inst->Arg(2)};
365 if (!value_index.IsImmediate()) {
366 return std::nullopt;
367 }
368 const u32 second_index{value_index.U32()};
369 if (first_index != second_index) {
370 IR::Value value_composite{inst->Arg(0)};
371 if (value_composite.IsImmediate()) {
372 return std::nullopt;
373 }
374 return FoldCompositeExtractImpl(value_composite, insert, construct, first_index);
375 }
376 return inst->Arg(1);
377}
378
379void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode insert) {
380 const IR::Value value_1{inst.Arg(0)};
381 const IR::Value value_2{inst.Arg(1)};
382 if (value_1.IsImmediate()) {
383 return;
384 }
385 if (!value_2.IsImmediate()) {
386 return;
387 }
388 const u32 first_index{value_2.U32()};
389 const std::optional result{FoldCompositeExtractImpl(value_1, insert, construct, first_index)};
390 if (!result) {
391 return;
392 }
393 inst.ReplaceUsesWith(*result);
394}
395
396IR::Value GetThroughCast(IR::Value value, IR::Opcode expected_cast) {
397 if (value.IsImmediate()) {
398 return value;
399 }
400 IR::Inst* const inst{value.InstRecursive()};
401 if (inst->GetOpcode() == expected_cast) {
402 return inst->Arg(0).Resolve();
403 }
404 return value;
405}
406
407void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
408 const IR::Value swizzle{inst.Arg(2)};
409 if (!swizzle.IsImmediate()) {
410 return;
411 }
412 const IR::Value value_1{GetThroughCast(inst.Arg(0).Resolve(), IR::Opcode::BitCastF32U32)};
413 const IR::Value value_2{GetThroughCast(inst.Arg(1).Resolve(), IR::Opcode::BitCastF32U32)};
414 if (value_1.IsImmediate()) {
415 return;
416 }
417 const u32 swizzle_value{swizzle.U32()};
418 if (swizzle_value != 0x99 && swizzle_value != 0xA5) {
419 return;
420 }
421 IR::Inst* const inst2{value_1.InstRecursive()};
422 if (inst2->GetOpcode() != IR::Opcode::ShuffleButterfly) {
423 return;
424 }
425 const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)};
426 if (value_2 != value_3) {
427 return;
428 }
429 const IR::Value index{inst2->Arg(1)};
430 const IR::Value clamp{inst2->Arg(2)};
431 const IR::Value segmentation_mask{inst2->Arg(3)};
432 if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) {
433 return;
434 }
435 if (clamp.U32() != 3 || segmentation_mask.U32() != 28) {
436 return;
437 }
438 if (swizzle_value == 0x99) {
439 // DPdxFine
440 if (index.U32() == 1) {
441 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
442 inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{inst.Arg(1)}));
443 }
444 } else if (swizzle_value == 0xA5) {
445 // DPdyFine
446 if (index.U32() == 2) {
447 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
448 inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{inst.Arg(1)}));
449 }
450 }
451}
452
453void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
454 switch (inst.GetOpcode()) {
455 case IR::Opcode::GetRegister:
456 return FoldGetRegister(inst);
457 case IR::Opcode::GetPred:
458 return FoldGetPred(inst);
459 case IR::Opcode::IAdd32:
460 return FoldAdd<u32>(block, inst);
461 case IR::Opcode::ISub32:
462 return FoldISub32(inst);
463 case IR::Opcode::IMul32:
464 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
465 return;
466 case IR::Opcode::ShiftRightArithmetic32:
467 FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast<u32>(a >> b); });
468 return;
469 case IR::Opcode::BitCastF32U32:
470 return FoldBitCast<IR::Opcode::BitCastF32U32, f32, u32>(inst, IR::Opcode::BitCastU32F32);
471 case IR::Opcode::BitCastU32F32:
472 return FoldBitCast<IR::Opcode::BitCastU32F32, u32, f32>(inst, IR::Opcode::BitCastF32U32);
473 case IR::Opcode::IAdd64:
474 return FoldAdd<u64>(block, inst);
475 case IR::Opcode::PackHalf2x16:
476 return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16);
477 case IR::Opcode::UnpackHalf2x16:
478 return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16);
479 case IR::Opcode::SelectU1:
480 case IR::Opcode::SelectU8:
481 case IR::Opcode::SelectU16:
482 case IR::Opcode::SelectU32:
483 case IR::Opcode::SelectU64:
484 case IR::Opcode::SelectF16:
485 case IR::Opcode::SelectF32:
486 case IR::Opcode::SelectF64:
487 return FoldSelect(inst);
488 case IR::Opcode::FPMul32:
489 return FoldFPMul32(inst);
490 case IR::Opcode::LogicalAnd:
491 return FoldLogicalAnd(inst);
492 case IR::Opcode::LogicalOr:
493 return FoldLogicalOr(inst);
494 case IR::Opcode::LogicalNot:
495 return FoldLogicalNot(inst);
496 case IR::Opcode::SLessThan:
497 FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; });
498 return;
499 case IR::Opcode::ULessThan:
500 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; });
501 return;
502 case IR::Opcode::SLessThanEqual:
503 FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; });
504 return;
505 case IR::Opcode::ULessThanEqual:
506 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; });
507 return;
508 case IR::Opcode::SGreaterThan:
509 FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; });
510 return;
511 case IR::Opcode::UGreaterThan:
512 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; });
513 return;
514 case IR::Opcode::SGreaterThanEqual:
515 FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; });
516 return;
517 case IR::Opcode::UGreaterThanEqual:
518 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; });
519 return;
520 case IR::Opcode::IEqual:
521 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; });
522 return;
523 case IR::Opcode::INotEqual:
524 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; });
525 return;
526 case IR::Opcode::BitwiseAnd32:
527 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; });
528 return;
529 case IR::Opcode::BitwiseOr32:
530 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a | b; });
531 return;
532 case IR::Opcode::BitwiseXor32:
533 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a ^ b; });
534 return;
535 case IR::Opcode::BitFieldUExtract:
536 FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) {
537 if (static_cast<size_t>(shift) + static_cast<size_t>(count) > 32) {
538 throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
539 base, shift, count);
540 }
541 return (base >> shift) & ((1U << count) - 1);
542 });
543 return;
544 case IR::Opcode::BitFieldSExtract:
545 FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) {
546 const size_t back_shift{static_cast<size_t>(shift) + static_cast<size_t>(count)};
547 const size_t left_shift{32 - back_shift};
548 const size_t right_shift{static_cast<size_t>(32 - count)};
549 if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) {
550 throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
551 base, shift, count);
552 }
553 return static_cast<u32>((base << left_shift) >> right_shift);
554 });
555 return;
556 case IR::Opcode::BitFieldInsert:
557 FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) {
558 if (bits >= 32 || offset >= 32) {
559 throw LogicError("Undefined result in {}({}, {}, {}, {})",
560 IR::Opcode::BitFieldInsert, base, insert, offset, bits);
561 }
562 return (base & ~(~(~0u << bits) << offset)) | (insert << offset);
563 });
564 return;
565 case IR::Opcode::CompositeExtractU32x2:
566 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x2,
567 IR::Opcode::CompositeInsertU32x2);
568 case IR::Opcode::CompositeExtractU32x3:
569 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x3,
570 IR::Opcode::CompositeInsertU32x3);
571 case IR::Opcode::CompositeExtractU32x4:
572 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x4,
573 IR::Opcode::CompositeInsertU32x4);
574 case IR::Opcode::CompositeExtractF32x2:
575 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2,
576 IR::Opcode::CompositeInsertF32x2);
577 case IR::Opcode::CompositeExtractF32x3:
578 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x3,
579 IR::Opcode::CompositeInsertF32x3);
580 case IR::Opcode::CompositeExtractF32x4:
581 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x4,
582 IR::Opcode::CompositeInsertF32x4);
583 case IR::Opcode::CompositeExtractF16x2:
584 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x2,
585 IR::Opcode::CompositeInsertF16x2);
586 case IR::Opcode::CompositeExtractF16x3:
587 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x3,
588 IR::Opcode::CompositeInsertF16x3);
589 case IR::Opcode::CompositeExtractF16x4:
590 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4,
591 IR::Opcode::CompositeInsertF16x4);
592 case IR::Opcode::FSwizzleAdd:
593 return FoldFSwizzleAdd(block, inst);
594 default:
595 break;
596 }
597}
598} // Anonymous namespace
599
600void ConstantPropagationPass(IR::Program& program) {
601 const auto end{program.post_order_blocks.rend()};
602 for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
603 IR::Block* const block{*it};
604 for (IR::Inst& inst : block->Instructions()) {
605 ConstantPropagation(*block, inst);
606 }
607 }
608}
609
610} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp
new file mode 100644
index 000000000..400836301
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp
@@ -0,0 +1,26 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/ir/basic_block.h"
6#include "shader_recompiler/frontend/ir/value.h"
7#include "shader_recompiler/ir_opt/passes.h"
8
9namespace Shader::Optimization {
10
11void DeadCodeEliminationPass(IR::Program& program) {
12 // We iterate over the instructions in reverse order.
13 // This is because removing an instruction reduces the number of uses for earlier instructions.
14 for (IR::Block* const block : program.post_order_blocks) {
15 auto it{block->end()};
16 while (it != block->begin()) {
17 --it;
18 if (!it->HasUses() && !it->MayHaveSideEffects()) {
19 it->Invalidate();
20 it = block->Instructions().erase(it);
21 }
22 }
23 }
24}
25
26} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
new file mode 100644
index 000000000..055ba9c54
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
@@ -0,0 +1,30 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/ir/ir_emitter.h"
6#include "shader_recompiler/ir_opt/passes.h"
7
8namespace Shader::Optimization {
9
10void VertexATransformPass(IR::Program& program) {
11 for (IR::Block* const block : program.blocks) {
12 for (IR::Inst& inst : block->Instructions()) {
13 if (inst.GetOpcode() == IR::Opcode::Epilogue) {
14 return inst.Invalidate();
15 }
16 }
17 }
18}
19
20void VertexBTransformPass(IR::Program& program) {
21 for (IR::Block* const block : program.blocks) {
22 for (IR::Inst& inst : block->Instructions()) {
23 if (inst.GetOpcode() == IR::Opcode::Prologue) {
24 return inst.Invalidate();
25 }
26 }
27 }
28}
29
30} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
new file mode 100644
index 000000000..4197b0095
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -0,0 +1,526 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <compare>
7#include <optional>
8#include <queue>
9
10#include <boost/container/flat_set.hpp>
11#include <boost/container/small_vector.hpp>
12
13#include "common/alignment.h"
14#include "shader_recompiler/frontend/ir/basic_block.h"
15#include "shader_recompiler/frontend/ir/breadth_first_search.h"
16#include "shader_recompiler/frontend/ir/ir_emitter.h"
17#include "shader_recompiler/frontend/ir/value.h"
18#include "shader_recompiler/ir_opt/passes.h"
19
20namespace Shader::Optimization {
21namespace {
22/// Address in constant buffers to the storage buffer descriptor
23struct StorageBufferAddr {
24 auto operator<=>(const StorageBufferAddr&) const noexcept = default;
25
26 u32 index;
27 u32 offset;
28};
29
30/// Block iterator to a global memory instruction and the storage buffer it uses
31struct StorageInst {
32 StorageBufferAddr storage_buffer;
33 IR::Inst* inst;
34 IR::Block* block;
35};
36
37/// Bias towards a certain range of constant buffers when looking for storage buffers
38struct Bias {
39 u32 index;
40 u32 offset_begin;
41 u32 offset_end;
42};
43
44using boost::container::flat_set;
45using boost::container::small_vector;
46using StorageBufferSet =
47 flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
48using StorageInstVector = small_vector<StorageInst, 24>;
49using StorageWritesSet =
50 flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
51
52struct StorageInfo {
53 StorageBufferSet set;
54 StorageInstVector to_replace;
55 StorageWritesSet writes;
56};
57
58/// Returns true when the instruction is a global memory instruction
59bool IsGlobalMemory(const IR::Inst& inst) {
60 switch (inst.GetOpcode()) {
61 case IR::Opcode::LoadGlobalS8:
62 case IR::Opcode::LoadGlobalU8:
63 case IR::Opcode::LoadGlobalS16:
64 case IR::Opcode::LoadGlobalU16:
65 case IR::Opcode::LoadGlobal32:
66 case IR::Opcode::LoadGlobal64:
67 case IR::Opcode::LoadGlobal128:
68 case IR::Opcode::WriteGlobalS8:
69 case IR::Opcode::WriteGlobalU8:
70 case IR::Opcode::WriteGlobalS16:
71 case IR::Opcode::WriteGlobalU16:
72 case IR::Opcode::WriteGlobal32:
73 case IR::Opcode::WriteGlobal64:
74 case IR::Opcode::WriteGlobal128:
75 case IR::Opcode::GlobalAtomicIAdd32:
76 case IR::Opcode::GlobalAtomicSMin32:
77 case IR::Opcode::GlobalAtomicUMin32:
78 case IR::Opcode::GlobalAtomicSMax32:
79 case IR::Opcode::GlobalAtomicUMax32:
80 case IR::Opcode::GlobalAtomicInc32:
81 case IR::Opcode::GlobalAtomicDec32:
82 case IR::Opcode::GlobalAtomicAnd32:
83 case IR::Opcode::GlobalAtomicOr32:
84 case IR::Opcode::GlobalAtomicXor32:
85 case IR::Opcode::GlobalAtomicExchange32:
86 case IR::Opcode::GlobalAtomicIAdd64:
87 case IR::Opcode::GlobalAtomicSMin64:
88 case IR::Opcode::GlobalAtomicUMin64:
89 case IR::Opcode::GlobalAtomicSMax64:
90 case IR::Opcode::GlobalAtomicUMax64:
91 case IR::Opcode::GlobalAtomicAnd64:
92 case IR::Opcode::GlobalAtomicOr64:
93 case IR::Opcode::GlobalAtomicXor64:
94 case IR::Opcode::GlobalAtomicExchange64:
95 case IR::Opcode::GlobalAtomicAddF32:
96 case IR::Opcode::GlobalAtomicAddF16x2:
97 case IR::Opcode::GlobalAtomicAddF32x2:
98 case IR::Opcode::GlobalAtomicMinF16x2:
99 case IR::Opcode::GlobalAtomicMinF32x2:
100 case IR::Opcode::GlobalAtomicMaxF16x2:
101 case IR::Opcode::GlobalAtomicMaxF32x2:
102 return true;
103 default:
104 return false;
105 }
106}
107
108/// Returns true when the instruction is a global memory instruction
109bool IsGlobalMemoryWrite(const IR::Inst& inst) {
110 switch (inst.GetOpcode()) {
111 case IR::Opcode::WriteGlobalS8:
112 case IR::Opcode::WriteGlobalU8:
113 case IR::Opcode::WriteGlobalS16:
114 case IR::Opcode::WriteGlobalU16:
115 case IR::Opcode::WriteGlobal32:
116 case IR::Opcode::WriteGlobal64:
117 case IR::Opcode::WriteGlobal128:
118 case IR::Opcode::GlobalAtomicIAdd32:
119 case IR::Opcode::GlobalAtomicSMin32:
120 case IR::Opcode::GlobalAtomicUMin32:
121 case IR::Opcode::GlobalAtomicSMax32:
122 case IR::Opcode::GlobalAtomicUMax32:
123 case IR::Opcode::GlobalAtomicInc32:
124 case IR::Opcode::GlobalAtomicDec32:
125 case IR::Opcode::GlobalAtomicAnd32:
126 case IR::Opcode::GlobalAtomicOr32:
127 case IR::Opcode::GlobalAtomicXor32:
128 case IR::Opcode::GlobalAtomicExchange32:
129 case IR::Opcode::GlobalAtomicIAdd64:
130 case IR::Opcode::GlobalAtomicSMin64:
131 case IR::Opcode::GlobalAtomicUMin64:
132 case IR::Opcode::GlobalAtomicSMax64:
133 case IR::Opcode::GlobalAtomicUMax64:
134 case IR::Opcode::GlobalAtomicAnd64:
135 case IR::Opcode::GlobalAtomicOr64:
136 case IR::Opcode::GlobalAtomicXor64:
137 case IR::Opcode::GlobalAtomicExchange64:
138 case IR::Opcode::GlobalAtomicAddF32:
139 case IR::Opcode::GlobalAtomicAddF16x2:
140 case IR::Opcode::GlobalAtomicAddF32x2:
141 case IR::Opcode::GlobalAtomicMinF16x2:
142 case IR::Opcode::GlobalAtomicMinF32x2:
143 case IR::Opcode::GlobalAtomicMaxF16x2:
144 case IR::Opcode::GlobalAtomicMaxF32x2:
145 return true;
146 default:
147 return false;
148 }
149}
150
151/// Converts a global memory opcode to its storage buffer equivalent
152IR::Opcode GlobalToStorage(IR::Opcode opcode) {
153 switch (opcode) {
154 case IR::Opcode::LoadGlobalS8:
155 return IR::Opcode::LoadStorageS8;
156 case IR::Opcode::LoadGlobalU8:
157 return IR::Opcode::LoadStorageU8;
158 case IR::Opcode::LoadGlobalS16:
159 return IR::Opcode::LoadStorageS16;
160 case IR::Opcode::LoadGlobalU16:
161 return IR::Opcode::LoadStorageU16;
162 case IR::Opcode::LoadGlobal32:
163 return IR::Opcode::LoadStorage32;
164 case IR::Opcode::LoadGlobal64:
165 return IR::Opcode::LoadStorage64;
166 case IR::Opcode::LoadGlobal128:
167 return IR::Opcode::LoadStorage128;
168 case IR::Opcode::WriteGlobalS8:
169 return IR::Opcode::WriteStorageS8;
170 case IR::Opcode::WriteGlobalU8:
171 return IR::Opcode::WriteStorageU8;
172 case IR::Opcode::WriteGlobalS16:
173 return IR::Opcode::WriteStorageS16;
174 case IR::Opcode::WriteGlobalU16:
175 return IR::Opcode::WriteStorageU16;
176 case IR::Opcode::WriteGlobal32:
177 return IR::Opcode::WriteStorage32;
178 case IR::Opcode::WriteGlobal64:
179 return IR::Opcode::WriteStorage64;
180 case IR::Opcode::WriteGlobal128:
181 return IR::Opcode::WriteStorage128;
182 case IR::Opcode::GlobalAtomicIAdd32:
183 return IR::Opcode::StorageAtomicIAdd32;
184 case IR::Opcode::GlobalAtomicSMin32:
185 return IR::Opcode::StorageAtomicSMin32;
186 case IR::Opcode::GlobalAtomicUMin32:
187 return IR::Opcode::StorageAtomicUMin32;
188 case IR::Opcode::GlobalAtomicSMax32:
189 return IR::Opcode::StorageAtomicSMax32;
190 case IR::Opcode::GlobalAtomicUMax32:
191 return IR::Opcode::StorageAtomicUMax32;
192 case IR::Opcode::GlobalAtomicInc32:
193 return IR::Opcode::StorageAtomicInc32;
194 case IR::Opcode::GlobalAtomicDec32:
195 return IR::Opcode::StorageAtomicDec32;
196 case IR::Opcode::GlobalAtomicAnd32:
197 return IR::Opcode::StorageAtomicAnd32;
198 case IR::Opcode::GlobalAtomicOr32:
199 return IR::Opcode::StorageAtomicOr32;
200 case IR::Opcode::GlobalAtomicXor32:
201 return IR::Opcode::StorageAtomicXor32;
202 case IR::Opcode::GlobalAtomicIAdd64:
203 return IR::Opcode::StorageAtomicIAdd64;
204 case IR::Opcode::GlobalAtomicSMin64:
205 return IR::Opcode::StorageAtomicSMin64;
206 case IR::Opcode::GlobalAtomicUMin64:
207 return IR::Opcode::StorageAtomicUMin64;
208 case IR::Opcode::GlobalAtomicSMax64:
209 return IR::Opcode::StorageAtomicSMax64;
210 case IR::Opcode::GlobalAtomicUMax64:
211 return IR::Opcode::StorageAtomicUMax64;
212 case IR::Opcode::GlobalAtomicAnd64:
213 return IR::Opcode::StorageAtomicAnd64;
214 case IR::Opcode::GlobalAtomicOr64:
215 return IR::Opcode::StorageAtomicOr64;
216 case IR::Opcode::GlobalAtomicXor64:
217 return IR::Opcode::StorageAtomicXor64;
218 case IR::Opcode::GlobalAtomicExchange32:
219 return IR::Opcode::StorageAtomicExchange32;
220 case IR::Opcode::GlobalAtomicExchange64:
221 return IR::Opcode::StorageAtomicExchange64;
222 case IR::Opcode::GlobalAtomicAddF32:
223 return IR::Opcode::StorageAtomicAddF32;
224 case IR::Opcode::GlobalAtomicAddF16x2:
225 return IR::Opcode::StorageAtomicAddF16x2;
226 case IR::Opcode::GlobalAtomicMinF16x2:
227 return IR::Opcode::StorageAtomicMinF16x2;
228 case IR::Opcode::GlobalAtomicMaxF16x2:
229 return IR::Opcode::StorageAtomicMaxF16x2;
230 case IR::Opcode::GlobalAtomicAddF32x2:
231 return IR::Opcode::StorageAtomicAddF32x2;
232 case IR::Opcode::GlobalAtomicMinF32x2:
233 return IR::Opcode::StorageAtomicMinF32x2;
234 case IR::Opcode::GlobalAtomicMaxF32x2:
235 return IR::Opcode::StorageAtomicMaxF32x2;
236 default:
237 throw InvalidArgument("Invalid global memory opcode {}", opcode);
238 }
239}
240
241/// Returns true when a storage buffer address satisfies a bias
242bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
243 return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
244 storage_buffer.offset < bias.offset_end;
245}
246
247struct LowAddrInfo {
248 IR::U32 value;
249 s32 imm_offset;
250};
251
252/// Tries to track the first 32-bits of a global memory instruction
253std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
254 // The first argument is the low level GPU pointer to the global memory instruction
255 const IR::Value addr{inst->Arg(0)};
256 if (addr.IsImmediate()) {
257 // Not much we can do if it's an immediate
258 return std::nullopt;
259 }
260 // This address is expected to either be a PackUint2x32, a IAdd64, or a CompositeConstructU32x2
261 IR::Inst* addr_inst{addr.InstRecursive()};
262 s32 imm_offset{0};
263 if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) {
264 // If it's an IAdd64, get the immediate offset it is applying and grab the address
265 // instruction. This expects for the instruction to be canonicalized having the address on
266 // the first argument and the immediate offset on the second one.
267 const IR::U64 imm_offset_value{addr_inst->Arg(1)};
268 if (!imm_offset_value.IsImmediate()) {
269 return std::nullopt;
270 }
271 imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64()));
272 const IR::U64 iadd_addr{addr_inst->Arg(0)};
273 if (iadd_addr.IsImmediate()) {
274 return std::nullopt;
275 }
276 addr_inst = iadd_addr.InstRecursive();
277 }
278 // With IAdd64 handled, now PackUint2x32 is expected
279 if (addr_inst->GetOpcode() == IR::Opcode::PackUint2x32) {
280 // PackUint2x32 is expected to be generated from a vector
281 const IR::Value vector{addr_inst->Arg(0)};
282 if (vector.IsImmediate()) {
283 return std::nullopt;
284 }
285 addr_inst = vector.InstRecursive();
286 }
287 // The vector is expected to be a CompositeConstructU32x2
288 if (addr_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) {
289 return std::nullopt;
290 }
291 // Grab the first argument from the CompositeConstructU32x2, this is the low address.
292 return LowAddrInfo{
293 .value{IR::U32{addr_inst->Arg(0)}},
294 .imm_offset = imm_offset,
295 };
296}
297
298/// Tries to track the storage buffer address used by a global memory instruction
299std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
300 const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
301 if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
302 return std::nullopt;
303 }
304 const IR::Value index{inst->Arg(0)};
305 const IR::Value offset{inst->Arg(1)};
306 if (!index.IsImmediate()) {
307 // Definitely not a storage buffer if it's read from a
308 // non-immediate index
309 return std::nullopt;
310 }
311 if (!offset.IsImmediate()) {
312 // TODO: Support SSBO arrays
313 return std::nullopt;
314 }
315 const StorageBufferAddr storage_buffer{
316 .index = index.U32(),
317 .offset = offset.U32(),
318 };
319 if (!Common::IsAligned(storage_buffer.offset, 16)) {
320 // The SSBO pointer has to be aligned
321 return std::nullopt;
322 }
323 if (bias && !MeetsBias(storage_buffer, *bias)) {
324 // We have to blacklist some addresses in case we wrongly
325 // point to them
326 return std::nullopt;
327 }
328 return storage_buffer;
329 }};
330 return BreadthFirstSearch(value, pred);
331}
332
333/// Collects the storage buffer used by a global memory instruction and the instruction itself
334void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) {
335 // NVN puts storage buffers in a specific range, we have to bias towards these addresses to
336 // avoid getting false positives
337 static constexpr Bias nvn_bias{
338 .index = 0,
339 .offset_begin = 0x110,
340 .offset_end = 0x610,
341 };
342 // Track the low address of the instruction
343 const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
344 if (!low_addr_info) {
345 // Failed to track the low address, use NVN fallbacks
346 return;
347 }
348 // First try to find storage buffers in the NVN address
349 const IR::U32 low_addr{low_addr_info->value};
350 std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
351 if (!storage_buffer) {
352 // If it fails, track without a bias
353 storage_buffer = Track(low_addr, nullptr);
354 if (!storage_buffer) {
355 // If that also fails, use NVN fallbacks
356 return;
357 }
358 }
359 // Collect storage buffer and the instruction
360 if (IsGlobalMemoryWrite(inst)) {
361 info.writes.insert(*storage_buffer);
362 }
363 info.set.insert(*storage_buffer);
364 info.to_replace.push_back(StorageInst{
365 .storage_buffer{*storage_buffer},
366 .inst = &inst,
367 .block = &block,
368 });
369}
370
371/// Returns the offset in indices (not bytes) for an equivalent storage instruction
372IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
373 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
374 IR::U32 offset;
375 if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
376 offset = low_addr->value;
377 if (low_addr->imm_offset != 0) {
378 offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset));
379 }
380 } else {
381 offset = ir.UConvert(32, IR::U64{inst.Arg(0)});
382 }
383 // Subtract the least significant 32 bits from the guest offset. The result is the storage
384 // buffer offset in bytes.
385 const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
386 return ir.ISub(offset, low_cbuf);
387}
388
389/// Replace a global memory load instruction with its storage buffer equivalent
390void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
391 const IR::U32& offset) {
392 const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
393 const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
394 const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})};
395 inst.ReplaceUsesWith(value);
396}
397
398/// Replace a global memory write instruction with its storage buffer equivalent
399void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
400 const IR::U32& offset) {
401 const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
402 const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
403 block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)});
404 inst.Invalidate();
405}
406
407/// Replace an atomic operation on global memory instruction with its storage buffer equivalent
408void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
409 const IR::U32& offset) {
410 const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
411 const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
412 const IR::Value value{
413 &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})};
414 inst.ReplaceUsesWith(value);
415}
416
417/// Replace a global memory instruction with its storage buffer equivalent
418void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
419 const IR::U32& offset) {
420 switch (inst.GetOpcode()) {
421 case IR::Opcode::LoadGlobalS8:
422 case IR::Opcode::LoadGlobalU8:
423 case IR::Opcode::LoadGlobalS16:
424 case IR::Opcode::LoadGlobalU16:
425 case IR::Opcode::LoadGlobal32:
426 case IR::Opcode::LoadGlobal64:
427 case IR::Opcode::LoadGlobal128:
428 return ReplaceLoad(block, inst, storage_index, offset);
429 case IR::Opcode::WriteGlobalS8:
430 case IR::Opcode::WriteGlobalU8:
431 case IR::Opcode::WriteGlobalS16:
432 case IR::Opcode::WriteGlobalU16:
433 case IR::Opcode::WriteGlobal32:
434 case IR::Opcode::WriteGlobal64:
435 case IR::Opcode::WriteGlobal128:
436 return ReplaceWrite(block, inst, storage_index, offset);
437 case IR::Opcode::GlobalAtomicIAdd32:
438 case IR::Opcode::GlobalAtomicSMin32:
439 case IR::Opcode::GlobalAtomicUMin32:
440 case IR::Opcode::GlobalAtomicSMax32:
441 case IR::Opcode::GlobalAtomicUMax32:
442 case IR::Opcode::GlobalAtomicInc32:
443 case IR::Opcode::GlobalAtomicDec32:
444 case IR::Opcode::GlobalAtomicAnd32:
445 case IR::Opcode::GlobalAtomicOr32:
446 case IR::Opcode::GlobalAtomicXor32:
447 case IR::Opcode::GlobalAtomicExchange32:
448 case IR::Opcode::GlobalAtomicIAdd64:
449 case IR::Opcode::GlobalAtomicSMin64:
450 case IR::Opcode::GlobalAtomicUMin64:
451 case IR::Opcode::GlobalAtomicSMax64:
452 case IR::Opcode::GlobalAtomicUMax64:
453 case IR::Opcode::GlobalAtomicAnd64:
454 case IR::Opcode::GlobalAtomicOr64:
455 case IR::Opcode::GlobalAtomicXor64:
456 case IR::Opcode::GlobalAtomicExchange64:
457 case IR::Opcode::GlobalAtomicAddF32:
458 case IR::Opcode::GlobalAtomicAddF16x2:
459 case IR::Opcode::GlobalAtomicAddF32x2:
460 case IR::Opcode::GlobalAtomicMinF16x2:
461 case IR::Opcode::GlobalAtomicMinF32x2:
462 case IR::Opcode::GlobalAtomicMaxF16x2:
463 case IR::Opcode::GlobalAtomicMaxF32x2:
464 return ReplaceAtomic(block, inst, storage_index, offset);
465 default:
466 throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode());
467 }
468}
469} // Anonymous namespace
470
471void GlobalMemoryToStorageBufferPass(IR::Program& program) {
472 StorageInfo info;
473 for (IR::Block* const block : program.post_order_blocks) {
474 for (IR::Inst& inst : block->Instructions()) {
475 if (!IsGlobalMemory(inst)) {
476 continue;
477 }
478 CollectStorageBuffers(*block, inst, info);
479 }
480 }
481 for (const StorageBufferAddr& storage_buffer : info.set) {
482 program.info.storage_buffers_descriptors.push_back({
483 .cbuf_index = storage_buffer.index,
484 .cbuf_offset = storage_buffer.offset,
485 .count = 1,
486 .is_written = info.writes.contains(storage_buffer),
487 });
488 }
489 for (const StorageInst& storage_inst : info.to_replace) {
490 const StorageBufferAddr storage_buffer{storage_inst.storage_buffer};
491 const auto it{info.set.find(storage_inst.storage_buffer)};
492 const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
493 IR::Block* const block{storage_inst.block};
494 IR::Inst* const inst{storage_inst.inst};
495 const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
496 Replace(*block, *inst, index, offset);
497 }
498}
499
500template <typename Descriptors, typename Descriptor, typename Func>
501static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
502 // TODO: Handle arrays
503 const auto it{std::ranges::find_if(descriptors, pred)};
504 if (it != descriptors.end()) {
505 return static_cast<u32>(std::distance(descriptors.begin(), it));
506 }
507 descriptors.push_back(desc);
508 return static_cast<u32>(descriptors.size()) - 1;
509}
510
511void JoinStorageInfo(Info& base, Info& source) {
512 auto& descriptors = base.storage_buffers_descriptors;
513 for (auto& desc : source.storage_buffers_descriptors) {
514 auto it{std::ranges::find_if(descriptors, [&desc](const auto& existing) {
515 return desc.cbuf_index == existing.cbuf_index &&
516 desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count;
517 })};
518 if (it != descriptors.end()) {
519 it->is_written |= desc.is_written;
520 continue;
521 }
522 descriptors.push_back(desc);
523 }
524}
525
526} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
new file mode 100644
index 000000000..e9b55f835
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
@@ -0,0 +1,38 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <vector>
6
7#include "shader_recompiler/frontend/ir/basic_block.h"
8#include "shader_recompiler/frontend/ir/value.h"
9#include "shader_recompiler/ir_opt/passes.h"
10
11namespace Shader::Optimization {
12
13void IdentityRemovalPass(IR::Program& program) {
14 std::vector<IR::Inst*> to_invalidate;
15 for (IR::Block* const block : program.blocks) {
16 for (auto inst = block->begin(); inst != block->end();) {
17 const size_t num_args{inst->NumArgs()};
18 for (size_t i = 0; i < num_args; ++i) {
19 IR::Value arg;
20 while ((arg = inst->Arg(i)).IsIdentity()) {
21 inst->SetArg(i, arg.Inst()->Arg(0));
22 }
23 }
24 if (inst->GetOpcode() == IR::Opcode::Identity ||
25 inst->GetOpcode() == IR::Opcode::Void) {
26 to_invalidate.push_back(&*inst);
27 inst = block->Instructions().erase(inst);
28 } else {
29 ++inst;
30 }
31 }
32 }
33 for (IR::Inst* const inst : to_invalidate) {
34 inst->Invalidate();
35 }
36}
37
38} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
new file mode 100644
index 000000000..773e1f961
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
@@ -0,0 +1,143 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include "shader_recompiler/frontend/ir/ir_emitter.h"
8#include "shader_recompiler/frontend/ir/value.h"
9#include "shader_recompiler/ir_opt/passes.h"
10
11namespace Shader::Optimization {
12namespace {
13IR::Opcode Replace(IR::Opcode op) {
14 switch (op) {
15 case IR::Opcode::FPAbs16:
16 return IR::Opcode::FPAbs32;
17 case IR::Opcode::FPAdd16:
18 return IR::Opcode::FPAdd32;
19 case IR::Opcode::FPCeil16:
20 return IR::Opcode::FPCeil32;
21 case IR::Opcode::FPFloor16:
22 return IR::Opcode::FPFloor32;
23 case IR::Opcode::FPFma16:
24 return IR::Opcode::FPFma32;
25 case IR::Opcode::FPMul16:
26 return IR::Opcode::FPMul32;
27 case IR::Opcode::FPNeg16:
28 return IR::Opcode::FPNeg32;
29 case IR::Opcode::FPRoundEven16:
30 return IR::Opcode::FPRoundEven32;
31 case IR::Opcode::FPSaturate16:
32 return IR::Opcode::FPSaturate32;
33 case IR::Opcode::FPClamp16:
34 return IR::Opcode::FPClamp32;
35 case IR::Opcode::FPTrunc16:
36 return IR::Opcode::FPTrunc32;
37 case IR::Opcode::CompositeConstructF16x2:
38 return IR::Opcode::CompositeConstructF32x2;
39 case IR::Opcode::CompositeConstructF16x3:
40 return IR::Opcode::CompositeConstructF32x3;
41 case IR::Opcode::CompositeConstructF16x4:
42 return IR::Opcode::CompositeConstructF32x4;
43 case IR::Opcode::CompositeExtractF16x2:
44 return IR::Opcode::CompositeExtractF32x2;
45 case IR::Opcode::CompositeExtractF16x3:
46 return IR::Opcode::CompositeExtractF32x3;
47 case IR::Opcode::CompositeExtractF16x4:
48 return IR::Opcode::CompositeExtractF32x4;
49 case IR::Opcode::CompositeInsertF16x2:
50 return IR::Opcode::CompositeInsertF32x2;
51 case IR::Opcode::CompositeInsertF16x3:
52 return IR::Opcode::CompositeInsertF32x3;
53 case IR::Opcode::CompositeInsertF16x4:
54 return IR::Opcode::CompositeInsertF32x4;
55 case IR::Opcode::FPOrdEqual16:
56 return IR::Opcode::FPOrdEqual32;
57 case IR::Opcode::FPUnordEqual16:
58 return IR::Opcode::FPUnordEqual32;
59 case IR::Opcode::FPOrdNotEqual16:
60 return IR::Opcode::FPOrdNotEqual32;
61 case IR::Opcode::FPUnordNotEqual16:
62 return IR::Opcode::FPUnordNotEqual32;
63 case IR::Opcode::FPOrdLessThan16:
64 return IR::Opcode::FPOrdLessThan32;
65 case IR::Opcode::FPUnordLessThan16:
66 return IR::Opcode::FPUnordLessThan32;
67 case IR::Opcode::FPOrdGreaterThan16:
68 return IR::Opcode::FPOrdGreaterThan32;
69 case IR::Opcode::FPUnordGreaterThan16:
70 return IR::Opcode::FPUnordGreaterThan32;
71 case IR::Opcode::FPOrdLessThanEqual16:
72 return IR::Opcode::FPOrdLessThanEqual32;
73 case IR::Opcode::FPUnordLessThanEqual16:
74 return IR::Opcode::FPUnordLessThanEqual32;
75 case IR::Opcode::FPOrdGreaterThanEqual16:
76 return IR::Opcode::FPOrdGreaterThanEqual32;
77 case IR::Opcode::FPUnordGreaterThanEqual16:
78 return IR::Opcode::FPUnordGreaterThanEqual32;
79 case IR::Opcode::FPIsNan16:
80 return IR::Opcode::FPIsNan32;
81 case IR::Opcode::ConvertS16F16:
82 return IR::Opcode::ConvertS16F32;
83 case IR::Opcode::ConvertS32F16:
84 return IR::Opcode::ConvertS32F32;
85 case IR::Opcode::ConvertS64F16:
86 return IR::Opcode::ConvertS64F32;
87 case IR::Opcode::ConvertU16F16:
88 return IR::Opcode::ConvertU16F32;
89 case IR::Opcode::ConvertU32F16:
90 return IR::Opcode::ConvertU32F32;
91 case IR::Opcode::ConvertU64F16:
92 return IR::Opcode::ConvertU64F32;
93 case IR::Opcode::PackFloat2x16:
94 return IR::Opcode::PackHalf2x16;
95 case IR::Opcode::UnpackFloat2x16:
96 return IR::Opcode::UnpackHalf2x16;
97 case IR::Opcode::ConvertF32F16:
98 return IR::Opcode::Identity;
99 case IR::Opcode::ConvertF16F32:
100 return IR::Opcode::Identity;
101 case IR::Opcode::ConvertF16S8:
102 return IR::Opcode::ConvertF32S8;
103 case IR::Opcode::ConvertF16S16:
104 return IR::Opcode::ConvertF32S16;
105 case IR::Opcode::ConvertF16S32:
106 return IR::Opcode::ConvertF32S32;
107 case IR::Opcode::ConvertF16S64:
108 return IR::Opcode::ConvertF32S64;
109 case IR::Opcode::ConvertF16U8:
110 return IR::Opcode::ConvertF32U8;
111 case IR::Opcode::ConvertF16U16:
112 return IR::Opcode::ConvertF32U16;
113 case IR::Opcode::ConvertF16U32:
114 return IR::Opcode::ConvertF32U32;
115 case IR::Opcode::ConvertF16U64:
116 return IR::Opcode::ConvertF32U64;
117 case IR::Opcode::GlobalAtomicAddF16x2:
118 return IR::Opcode::GlobalAtomicAddF32x2;
119 case IR::Opcode::StorageAtomicAddF16x2:
120 return IR::Opcode::StorageAtomicAddF32x2;
121 case IR::Opcode::GlobalAtomicMinF16x2:
122 return IR::Opcode::GlobalAtomicMinF32x2;
123 case IR::Opcode::StorageAtomicMinF16x2:
124 return IR::Opcode::StorageAtomicMinF32x2;
125 case IR::Opcode::GlobalAtomicMaxF16x2:
126 return IR::Opcode::GlobalAtomicMaxF32x2;
127 case IR::Opcode::StorageAtomicMaxF16x2:
128 return IR::Opcode::StorageAtomicMaxF32x2;
129 default:
130 return op;
131 }
132}
133} // Anonymous namespace
134
135void LowerFp16ToFp32(IR::Program& program) {
136 for (IR::Block* const block : program.blocks) {
137 for (IR::Inst& inst : block->Instructions()) {
138 inst.ReplaceOpcode(Replace(inst.GetOpcode()));
139 }
140 }
141}
142
143} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
new file mode 100644
index 000000000..e80d3d1d9
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
@@ -0,0 +1,218 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9#include "shader_recompiler/frontend/ir/ir_emitter.h"
10#include "shader_recompiler/frontend/ir/program.h"
11#include "shader_recompiler/frontend/ir/value.h"
12#include "shader_recompiler/ir_opt/passes.h"
13
14namespace Shader::Optimization {
15namespace {
16std::pair<IR::U32, IR::U32> Unpack(IR::IREmitter& ir, const IR::Value& packed) {
17 if (packed.IsImmediate()) {
18 const u64 value{packed.U64()};
19 return {
20 ir.Imm32(static_cast<u32>(value)),
21 ir.Imm32(static_cast<u32>(value >> 32)),
22 };
23 } else {
24 return std::pair<IR::U32, IR::U32>{
25 ir.CompositeExtract(packed, 0u),
26 ir.CompositeExtract(packed, 1u),
27 };
28 }
29}
30
31void IAdd64To32(IR::Block& block, IR::Inst& inst) {
32 if (inst.HasAssociatedPseudoOperation()) {
33 throw NotImplementedException("IAdd64 emulation with pseudo instructions");
34 }
35 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
36 const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
37 const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
38
39 const IR::U32 ret_lo{ir.IAdd(a_lo, b_lo)};
40 const IR::U32 carry{ir.Select(ir.GetCarryFromOp(ret_lo), ir.Imm32(1u), ir.Imm32(0u))};
41
42 const IR::U32 ret_hi{ir.IAdd(ir.IAdd(a_hi, b_hi), carry)};
43 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
44}
45
46void ISub64To32(IR::Block& block, IR::Inst& inst) {
47 if (inst.HasAssociatedPseudoOperation()) {
48 throw NotImplementedException("ISub64 emulation with pseudo instructions");
49 }
50 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
51 const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
52 const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
53
54 const IR::U32 ret_lo{ir.ISub(a_lo, b_lo)};
55 const IR::U1 underflow{ir.IGreaterThan(ret_lo, a_lo, false)};
56 const IR::U32 underflow_bit{ir.Select(underflow, ir.Imm32(1u), ir.Imm32(0u))};
57
58 const IR::U32 ret_hi{ir.ISub(ir.ISub(a_hi, b_hi), underflow_bit)};
59 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
60}
61
62void INeg64To32(IR::Block& block, IR::Inst& inst) {
63 if (inst.HasAssociatedPseudoOperation()) {
64 throw NotImplementedException("INeg64 emulation with pseudo instructions");
65 }
66 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
67 auto [lo, hi]{Unpack(ir, inst.Arg(0))};
68 lo = ir.BitwiseNot(lo);
69 hi = ir.BitwiseNot(hi);
70
71 lo = ir.IAdd(lo, ir.Imm32(1));
72
73 const IR::U32 carry{ir.Select(ir.GetCarryFromOp(lo), ir.Imm32(1u), ir.Imm32(0u))};
74 hi = ir.IAdd(hi, carry);
75
76 inst.ReplaceUsesWith(ir.CompositeConstruct(lo, hi));
77}
78
79void ShiftLeftLogical64To32(IR::Block& block, IR::Inst& inst) {
80 if (inst.HasAssociatedPseudoOperation()) {
81 throw NotImplementedException("ShiftLeftLogical64 emulation with pseudo instructions");
82 }
83 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
84 const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
85 const IR::U32 shift{inst.Arg(1)};
86
87 const IR::U32 shifted_lo{ir.ShiftLeftLogical(lo, shift)};
88 const IR::U32 shifted_hi{ir.ShiftLeftLogical(hi, shift)};
89
90 const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
91 const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
92 const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
93
94 const IR::U32 long_ret_lo{ir.Imm32(0)};
95 const IR::U32 long_ret_hi{ir.ShiftLeftLogical(lo, inv_shift)};
96
97 const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
98 const IR::U32 lo_extract{ir.BitFieldExtract(lo, shift_complement, shift, false)};
99 const IR::U32 short_ret_lo{shifted_lo};
100 const IR::U32 short_ret_hi{ir.BitwiseOr(shifted_hi, lo_extract)};
101
102 const IR::U32 zero_ret_lo{lo};
103 const IR::U32 zero_ret_hi{hi};
104
105 const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
106 const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
107
108 const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
109 const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
110 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
111}
112
113void ShiftRightLogical64To32(IR::Block& block, IR::Inst& inst) {
114 if (inst.HasAssociatedPseudoOperation()) {
115 throw NotImplementedException("ShiftRightLogical64 emulation with pseudo instructions");
116 }
117 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
118 const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
119 const IR::U32 shift{inst.Arg(1)};
120
121 const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
122 const IR::U32 shifted_hi{ir.ShiftRightLogical(hi, shift)};
123
124 const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
125 const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
126 const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
127
128 const IR::U32 long_ret_hi{ir.Imm32(0)};
129 const IR::U32 long_ret_lo{ir.ShiftRightLogical(hi, inv_shift)};
130
131 const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
132 const IR::U32 short_hi_extract{ir.BitFieldExtract(hi, ir.Imm32(0), shift)};
133 const IR::U32 short_ret_hi{shifted_hi};
134 const IR::U32 short_ret_lo{
135 ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
136
137 const IR::U32 zero_ret_lo{lo};
138 const IR::U32 zero_ret_hi{hi};
139
140 const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
141 const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
142
143 const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
144 const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
145 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
146}
147
148void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) {
149 if (inst.HasAssociatedPseudoOperation()) {
150 throw NotImplementedException("ShiftRightArithmetic64 emulation with pseudo instructions");
151 }
152 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
153 const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
154 const IR::U32 shift{inst.Arg(1)};
155
156 const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
157 const IR::U32 shifted_hi{ir.ShiftRightArithmetic(hi, shift)};
158
159 const IR::U32 sign_extension{ir.ShiftRightArithmetic(hi, ir.Imm32(31))};
160
161 const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
162 const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
163 const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
164
165 const IR::U32 long_ret_hi{sign_extension};
166 const IR::U32 long_ret_lo{ir.ShiftRightArithmetic(hi, inv_shift)};
167
168 const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
169 const IR::U32 short_hi_extract(ir.BitFieldExtract(hi, ir.Imm32(0), shift));
170 const IR::U32 short_ret_hi{shifted_hi};
171 const IR::U32 short_ret_lo{
172 ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
173
174 const IR::U32 zero_ret_lo{lo};
175 const IR::U32 zero_ret_hi{hi};
176
177 const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
178 const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
179
180 const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
181 const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
182 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
183}
184
185void Lower(IR::Block& block, IR::Inst& inst) {
186 switch (inst.GetOpcode()) {
187 case IR::Opcode::PackUint2x32:
188 case IR::Opcode::UnpackUint2x32:
189 return inst.ReplaceOpcode(IR::Opcode::Identity);
190 case IR::Opcode::IAdd64:
191 return IAdd64To32(block, inst);
192 case IR::Opcode::ISub64:
193 return ISub64To32(block, inst);
194 case IR::Opcode::INeg64:
195 return INeg64To32(block, inst);
196 case IR::Opcode::ShiftLeftLogical64:
197 return ShiftLeftLogical64To32(block, inst);
198 case IR::Opcode::ShiftRightLogical64:
199 return ShiftRightLogical64To32(block, inst);
200 case IR::Opcode::ShiftRightArithmetic64:
201 return ShiftRightArithmetic64To32(block, inst);
202 default:
203 break;
204 }
205}
206} // Anonymous namespace
207
208void LowerInt64ToInt32(IR::Program& program) {
209 const auto end{program.post_order_blocks.rend()};
210 for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
211 IR::Block* const block{*it};
212 for (IR::Inst& inst : block->Instructions()) {
213 Lower(*block, inst);
214 }
215 }
216}
217
218} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
new file mode 100644
index 000000000..2f89b1ea0
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -0,0 +1,32 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <span>
8
9#include "shader_recompiler/environment.h"
10#include "shader_recompiler/frontend/ir/basic_block.h"
11#include "shader_recompiler/frontend/ir/program.h"
12
13namespace Shader::Optimization {
14
15void CollectShaderInfoPass(Environment& env, IR::Program& program);
16void ConstantPropagationPass(IR::Program& program);
17void DeadCodeEliminationPass(IR::Program& program);
18void GlobalMemoryToStorageBufferPass(IR::Program& program);
19void IdentityRemovalPass(IR::Program& program);
20void LowerFp16ToFp32(IR::Program& program);
21void LowerInt64ToInt32(IR::Program& program);
22void SsaRewritePass(IR::Program& program);
23void TexturePass(Environment& env, IR::Program& program);
24void VerificationPass(const IR::Program& program);
25
26// Dual Vertex
27void VertexATransformPass(IR::Program& program);
28void VertexBTransformPass(IR::Program& program);
29void JoinTextureInfo(Info& base, Info& source);
30void JoinStorageInfo(Info& base, Info& source);
31
32} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
new file mode 100644
index 000000000..53145fb5e
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -0,0 +1,383 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// This file implements the SSA rewriting algorithm proposed in
6//
7// Simple and Efficient Construction of Static Single Assignment Form.
8// Braun M., Buchwald S., Hack S., Leiba R., Mallon C., Zwinkau A. (2013)
9// In: Jhala R., De Bosschere K. (eds)
10// Compiler Construction. CC 2013.
11// Lecture Notes in Computer Science, vol 7791.
12// Springer, Berlin, Heidelberg
13//
14// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6
15//
16
17#include <span>
18#include <variant>
19#include <vector>
20
21#include <boost/container/flat_map.hpp>
22#include <boost/container/flat_set.hpp>
23
24#include "shader_recompiler/frontend/ir/basic_block.h"
25#include "shader_recompiler/frontend/ir/opcodes.h"
26#include "shader_recompiler/frontend/ir/pred.h"
27#include "shader_recompiler/frontend/ir/reg.h"
28#include "shader_recompiler/frontend/ir/value.h"
29#include "shader_recompiler/ir_opt/passes.h"
30
31namespace Shader::Optimization {
32namespace {
33struct FlagTag {
34 auto operator<=>(const FlagTag&) const noexcept = default;
35};
36struct ZeroFlagTag : FlagTag {};
37struct SignFlagTag : FlagTag {};
38struct CarryFlagTag : FlagTag {};
39struct OverflowFlagTag : FlagTag {};
40
41struct GotoVariable : FlagTag {
42 GotoVariable() = default;
43 explicit GotoVariable(u32 index_) : index{index_} {}
44
45 auto operator<=>(const GotoVariable&) const noexcept = default;
46
47 u32 index;
48};
49
50struct IndirectBranchVariable {
51 auto operator<=>(const IndirectBranchVariable&) const noexcept = default;
52};
53
54using Variant = std::variant<IR::Reg, IR::Pred, ZeroFlagTag, SignFlagTag, CarryFlagTag,
55 OverflowFlagTag, GotoVariable, IndirectBranchVariable>;
56using ValueMap = boost::container::flat_map<IR::Block*, IR::Value>;
57
58struct DefTable {
59 const IR::Value& Def(IR::Block* block, IR::Reg variable) {
60 return block->SsaRegValue(variable);
61 }
62 void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) {
63 block->SetSsaRegValue(variable, value);
64 }
65
66 const IR::Value& Def(IR::Block* block, IR::Pred variable) {
67 return preds[IR::PredIndex(variable)][block];
68 }
69 void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) {
70 preds[IR::PredIndex(variable)].insert_or_assign(block, value);
71 }
72
73 const IR::Value& Def(IR::Block* block, GotoVariable variable) {
74 return goto_vars[variable.index][block];
75 }
76 void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) {
77 goto_vars[variable.index].insert_or_assign(block, value);
78 }
79
80 const IR::Value& Def(IR::Block* block, IndirectBranchVariable) {
81 return indirect_branch_var[block];
82 }
83 void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) {
84 indirect_branch_var.insert_or_assign(block, value);
85 }
86
87 const IR::Value& Def(IR::Block* block, ZeroFlagTag) {
88 return zero_flag[block];
89 }
90 void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) {
91 zero_flag.insert_or_assign(block, value);
92 }
93
94 const IR::Value& Def(IR::Block* block, SignFlagTag) {
95 return sign_flag[block];
96 }
97 void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) {
98 sign_flag.insert_or_assign(block, value);
99 }
100
101 const IR::Value& Def(IR::Block* block, CarryFlagTag) {
102 return carry_flag[block];
103 }
104 void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) {
105 carry_flag.insert_or_assign(block, value);
106 }
107
108 const IR::Value& Def(IR::Block* block, OverflowFlagTag) {
109 return overflow_flag[block];
110 }
111 void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) {
112 overflow_flag.insert_or_assign(block, value);
113 }
114
115 std::array<ValueMap, IR::NUM_USER_PREDS> preds;
116 boost::container::flat_map<u32, ValueMap> goto_vars;
117 ValueMap indirect_branch_var;
118 ValueMap zero_flag;
119 ValueMap sign_flag;
120 ValueMap carry_flag;
121 ValueMap overflow_flag;
122};
123
124IR::Opcode UndefOpcode(IR::Reg) noexcept {
125 return IR::Opcode::UndefU32;
126}
127
128IR::Opcode UndefOpcode(IR::Pred) noexcept {
129 return IR::Opcode::UndefU1;
130}
131
132IR::Opcode UndefOpcode(const FlagTag&) noexcept {
133 return IR::Opcode::UndefU1;
134}
135
136IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept {
137 return IR::Opcode::UndefU32;
138}
139
140enum class Status {
141 Start,
142 SetValue,
143 PreparePhiArgument,
144 PushPhiArgument,
145};
146
147template <typename Type>
148struct ReadState {
149 ReadState(IR::Block* block_) : block{block_} {}
150 ReadState() = default;
151
152 IR::Block* block{};
153 IR::Value result{};
154 IR::Inst* phi{};
155 IR::Block* const* pred_it{};
156 IR::Block* const* pred_end{};
157 Status pc{Status::Start};
158};
159
160class Pass {
161public:
162 template <typename Type>
163 void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) {
164 current_def.SetDef(block, variable, value);
165 }
166
167 template <typename Type>
168 IR::Value ReadVariable(Type variable, IR::Block* root_block) {
169 boost::container::small_vector<ReadState<Type>, 64> stack{
170 ReadState<Type>(nullptr),
171 ReadState<Type>(root_block),
172 };
173 const auto prepare_phi_operand{[&] {
174 if (stack.back().pred_it == stack.back().pred_end) {
175 IR::Inst* const phi{stack.back().phi};
176 IR::Block* const block{stack.back().block};
177 const IR::Value result{TryRemoveTrivialPhi(*phi, block, UndefOpcode(variable))};
178 stack.pop_back();
179 stack.back().result = result;
180 WriteVariable(variable, block, result);
181 } else {
182 IR::Block* const imm_pred{*stack.back().pred_it};
183 stack.back().pc = Status::PushPhiArgument;
184 stack.emplace_back(imm_pred);
185 }
186 }};
187 do {
188 IR::Block* const block{stack.back().block};
189 switch (stack.back().pc) {
190 case Status::Start: {
191 if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) {
192 stack.back().result = def;
193 } else if (!block->IsSsaSealed()) {
194 // Incomplete CFG
195 IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
196 phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
197
198 incomplete_phis[block].insert_or_assign(variable, phi);
199 stack.back().result = IR::Value{&*phi};
200 } else if (const std::span imm_preds = block->ImmPredecessors();
201 imm_preds.size() == 1) {
202 // Optimize the common case of one predecessor: no phi needed
203 stack.back().pc = Status::SetValue;
204 stack.emplace_back(imm_preds.front());
205 break;
206 } else {
207 // Break potential cycles with operandless phi
208 IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
209 phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
210
211 WriteVariable(variable, block, IR::Value{phi});
212
213 stack.back().phi = phi;
214 stack.back().pred_it = imm_preds.data();
215 stack.back().pred_end = imm_preds.data() + imm_preds.size();
216 prepare_phi_operand();
217 break;
218 }
219 }
220 [[fallthrough]];
221 case Status::SetValue: {
222 const IR::Value result{stack.back().result};
223 WriteVariable(variable, block, result);
224 stack.pop_back();
225 stack.back().result = result;
226 break;
227 }
228 case Status::PushPhiArgument: {
229 IR::Inst* const phi{stack.back().phi};
230 phi->AddPhiOperand(*stack.back().pred_it, stack.back().result);
231 ++stack.back().pred_it;
232 }
233 [[fallthrough]];
234 case Status::PreparePhiArgument:
235 prepare_phi_operand();
236 break;
237 }
238 } while (stack.size() > 1);
239 return stack.back().result;
240 }
241
242 void SealBlock(IR::Block* block) {
243 const auto it{incomplete_phis.find(block)};
244 if (it != incomplete_phis.end()) {
245 for (auto& pair : it->second) {
246 auto& variant{pair.first};
247 auto& phi{pair.second};
248 std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant);
249 }
250 }
251 block->SsaSeal();
252 }
253
254private:
255 template <typename Type>
256 IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) {
257 for (IR::Block* const imm_pred : block->ImmPredecessors()) {
258 phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred));
259 }
260 return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable));
261 }
262
263 IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) {
264 IR::Value same;
265 const size_t num_args{phi.NumArgs()};
266 for (size_t arg_index = 0; arg_index < num_args; ++arg_index) {
267 const IR::Value& op{phi.Arg(arg_index)};
268 if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) {
269 // Unique value or self-reference
270 continue;
271 }
272 if (!same.IsEmpty()) {
273 // The phi merges at least two values: not trivial
274 return IR::Value{&phi};
275 }
276 same = op;
277 }
278 // Remove the phi node from the block, it will be reinserted
279 IR::Block::InstructionList& list{block->Instructions()};
280 list.erase(IR::Block::InstructionList::s_iterator_to(phi));
281
282 // Find the first non-phi instruction and use it as an insertion point
283 IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)};
284 if (same.IsEmpty()) {
285 // The phi is unreachable or in the start block
286 // Insert an undefined instruction and make it the phi node replacement
287 // The "phi" node reinsertion point is specified after this instruction
288 reinsert_point = block->PrependNewInst(reinsert_point, undef_opcode);
289 same = IR::Value{&*reinsert_point};
290 ++reinsert_point;
291 }
292 // Reinsert the phi node and reroute all its uses to the "same" value
293 list.insert(reinsert_point, phi);
294 phi.ReplaceUsesWith(same);
295 // TODO: Try to recursively remove all phi users, which might have become trivial
296 return same;
297 }
298
299 boost::container::flat_map<IR::Block*, boost::container::flat_map<Variant, IR::Inst*>>
300 incomplete_phis;
301 DefTable current_def;
302};
303
304void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
305 switch (inst.GetOpcode()) {
306 case IR::Opcode::SetRegister:
307 if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
308 pass.WriteVariable(reg, block, inst.Arg(1));
309 }
310 break;
311 case IR::Opcode::SetPred:
312 if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
313 pass.WriteVariable(pred, block, inst.Arg(1));
314 }
315 break;
316 case IR::Opcode::SetGotoVariable:
317 pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
318 break;
319 case IR::Opcode::SetIndirectBranchVariable:
320 pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0));
321 break;
322 case IR::Opcode::SetZFlag:
323 pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0));
324 break;
325 case IR::Opcode::SetSFlag:
326 pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0));
327 break;
328 case IR::Opcode::SetCFlag:
329 pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0));
330 break;
331 case IR::Opcode::SetOFlag:
332 pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0));
333 break;
334 case IR::Opcode::GetRegister:
335 if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
336 inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
337 }
338 break;
339 case IR::Opcode::GetPred:
340 if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
341 inst.ReplaceUsesWith(pass.ReadVariable(pred, block));
342 }
343 break;
344 case IR::Opcode::GetGotoVariable:
345 inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
346 break;
347 case IR::Opcode::GetIndirectBranchVariable:
348 inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block));
349 break;
350 case IR::Opcode::GetZFlag:
351 inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block));
352 break;
353 case IR::Opcode::GetSFlag:
354 inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block));
355 break;
356 case IR::Opcode::GetCFlag:
357 inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block));
358 break;
359 case IR::Opcode::GetOFlag:
360 inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block));
361 break;
362 default:
363 break;
364 }
365}
366
367void VisitBlock(Pass& pass, IR::Block* block) {
368 for (IR::Inst& inst : block->Instructions()) {
369 VisitInst(pass, block, inst);
370 }
371 pass.SealBlock(block);
372}
373} // Anonymous namespace
374
375void SsaRewritePass(IR::Program& program) {
376 Pass pass;
377 const auto end{program.post_order_blocks.rend()};
378 for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) {
379 VisitBlock(pass, *block);
380 }
381}
382
383} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
new file mode 100644
index 000000000..44ad10d43
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -0,0 +1,523 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <bit>
7#include <optional>
8
9#include <boost/container/small_vector.hpp>
10
11#include "shader_recompiler/environment.h"
12#include "shader_recompiler/frontend/ir/basic_block.h"
13#include "shader_recompiler/frontend/ir/breadth_first_search.h"
14#include "shader_recompiler/frontend/ir/ir_emitter.h"
15#include "shader_recompiler/ir_opt/passes.h"
16#include "shader_recompiler/shader_info.h"
17
18namespace Shader::Optimization {
19namespace {
20struct ConstBufferAddr {
21 u32 index;
22 u32 offset;
23 u32 secondary_index;
24 u32 secondary_offset;
25 IR::U32 dynamic_offset;
26 u32 count;
27 bool has_secondary;
28};
29
30struct TextureInst {
31 ConstBufferAddr cbuf;
32 IR::Inst* inst;
33 IR::Block* block;
34};
35
36using TextureInstVector = boost::container::small_vector<TextureInst, 24>;
37
38constexpr u32 DESCRIPTOR_SIZE = 8;
39constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast<u32>(std::countr_zero(DESCRIPTOR_SIZE));
40
41IR::Opcode IndexedInstruction(const IR::Inst& inst) {
42 switch (inst.GetOpcode()) {
43 case IR::Opcode::BindlessImageSampleImplicitLod:
44 case IR::Opcode::BoundImageSampleImplicitLod:
45 return IR::Opcode::ImageSampleImplicitLod;
46 case IR::Opcode::BoundImageSampleExplicitLod:
47 case IR::Opcode::BindlessImageSampleExplicitLod:
48 return IR::Opcode::ImageSampleExplicitLod;
49 case IR::Opcode::BoundImageSampleDrefImplicitLod:
50 case IR::Opcode::BindlessImageSampleDrefImplicitLod:
51 return IR::Opcode::ImageSampleDrefImplicitLod;
52 case IR::Opcode::BoundImageSampleDrefExplicitLod:
53 case IR::Opcode::BindlessImageSampleDrefExplicitLod:
54 return IR::Opcode::ImageSampleDrefExplicitLod;
55 case IR::Opcode::BindlessImageGather:
56 case IR::Opcode::BoundImageGather:
57 return IR::Opcode::ImageGather;
58 case IR::Opcode::BindlessImageGatherDref:
59 case IR::Opcode::BoundImageGatherDref:
60 return IR::Opcode::ImageGatherDref;
61 case IR::Opcode::BindlessImageFetch:
62 case IR::Opcode::BoundImageFetch:
63 return IR::Opcode::ImageFetch;
64 case IR::Opcode::BoundImageQueryDimensions:
65 case IR::Opcode::BindlessImageQueryDimensions:
66 return IR::Opcode::ImageQueryDimensions;
67 case IR::Opcode::BoundImageQueryLod:
68 case IR::Opcode::BindlessImageQueryLod:
69 return IR::Opcode::ImageQueryLod;
70 case IR::Opcode::BoundImageGradient:
71 case IR::Opcode::BindlessImageGradient:
72 return IR::Opcode::ImageGradient;
73 case IR::Opcode::BoundImageRead:
74 case IR::Opcode::BindlessImageRead:
75 return IR::Opcode::ImageRead;
76 case IR::Opcode::BoundImageWrite:
77 case IR::Opcode::BindlessImageWrite:
78 return IR::Opcode::ImageWrite;
79 case IR::Opcode::BoundImageAtomicIAdd32:
80 case IR::Opcode::BindlessImageAtomicIAdd32:
81 return IR::Opcode::ImageAtomicIAdd32;
82 case IR::Opcode::BoundImageAtomicSMin32:
83 case IR::Opcode::BindlessImageAtomicSMin32:
84 return IR::Opcode::ImageAtomicSMin32;
85 case IR::Opcode::BoundImageAtomicUMin32:
86 case IR::Opcode::BindlessImageAtomicUMin32:
87 return IR::Opcode::ImageAtomicUMin32;
88 case IR::Opcode::BoundImageAtomicSMax32:
89 case IR::Opcode::BindlessImageAtomicSMax32:
90 return IR::Opcode::ImageAtomicSMax32;
91 case IR::Opcode::BoundImageAtomicUMax32:
92 case IR::Opcode::BindlessImageAtomicUMax32:
93 return IR::Opcode::ImageAtomicUMax32;
94 case IR::Opcode::BoundImageAtomicInc32:
95 case IR::Opcode::BindlessImageAtomicInc32:
96 return IR::Opcode::ImageAtomicInc32;
97 case IR::Opcode::BoundImageAtomicDec32:
98 case IR::Opcode::BindlessImageAtomicDec32:
99 return IR::Opcode::ImageAtomicDec32;
100 case IR::Opcode::BoundImageAtomicAnd32:
101 case IR::Opcode::BindlessImageAtomicAnd32:
102 return IR::Opcode::ImageAtomicAnd32;
103 case IR::Opcode::BoundImageAtomicOr32:
104 case IR::Opcode::BindlessImageAtomicOr32:
105 return IR::Opcode::ImageAtomicOr32;
106 case IR::Opcode::BoundImageAtomicXor32:
107 case IR::Opcode::BindlessImageAtomicXor32:
108 return IR::Opcode::ImageAtomicXor32;
109 case IR::Opcode::BoundImageAtomicExchange32:
110 case IR::Opcode::BindlessImageAtomicExchange32:
111 return IR::Opcode::ImageAtomicExchange32;
112 default:
113 return IR::Opcode::Void;
114 }
115}
116
117bool IsBindless(const IR::Inst& inst) {
118 switch (inst.GetOpcode()) {
119 case IR::Opcode::BindlessImageSampleImplicitLod:
120 case IR::Opcode::BindlessImageSampleExplicitLod:
121 case IR::Opcode::BindlessImageSampleDrefImplicitLod:
122 case IR::Opcode::BindlessImageSampleDrefExplicitLod:
123 case IR::Opcode::BindlessImageGather:
124 case IR::Opcode::BindlessImageGatherDref:
125 case IR::Opcode::BindlessImageFetch:
126 case IR::Opcode::BindlessImageQueryDimensions:
127 case IR::Opcode::BindlessImageQueryLod:
128 case IR::Opcode::BindlessImageGradient:
129 case IR::Opcode::BindlessImageRead:
130 case IR::Opcode::BindlessImageWrite:
131 case IR::Opcode::BindlessImageAtomicIAdd32:
132 case IR::Opcode::BindlessImageAtomicSMin32:
133 case IR::Opcode::BindlessImageAtomicUMin32:
134 case IR::Opcode::BindlessImageAtomicSMax32:
135 case IR::Opcode::BindlessImageAtomicUMax32:
136 case IR::Opcode::BindlessImageAtomicInc32:
137 case IR::Opcode::BindlessImageAtomicDec32:
138 case IR::Opcode::BindlessImageAtomicAnd32:
139 case IR::Opcode::BindlessImageAtomicOr32:
140 case IR::Opcode::BindlessImageAtomicXor32:
141 case IR::Opcode::BindlessImageAtomicExchange32:
142 return true;
143 case IR::Opcode::BoundImageSampleImplicitLod:
144 case IR::Opcode::BoundImageSampleExplicitLod:
145 case IR::Opcode::BoundImageSampleDrefImplicitLod:
146 case IR::Opcode::BoundImageSampleDrefExplicitLod:
147 case IR::Opcode::BoundImageGather:
148 case IR::Opcode::BoundImageGatherDref:
149 case IR::Opcode::BoundImageFetch:
150 case IR::Opcode::BoundImageQueryDimensions:
151 case IR::Opcode::BoundImageQueryLod:
152 case IR::Opcode::BoundImageGradient:
153 case IR::Opcode::BoundImageRead:
154 case IR::Opcode::BoundImageWrite:
155 case IR::Opcode::BoundImageAtomicIAdd32:
156 case IR::Opcode::BoundImageAtomicSMin32:
157 case IR::Opcode::BoundImageAtomicUMin32:
158 case IR::Opcode::BoundImageAtomicSMax32:
159 case IR::Opcode::BoundImageAtomicUMax32:
160 case IR::Opcode::BoundImageAtomicInc32:
161 case IR::Opcode::BoundImageAtomicDec32:
162 case IR::Opcode::BoundImageAtomicAnd32:
163 case IR::Opcode::BoundImageAtomicOr32:
164 case IR::Opcode::BoundImageAtomicXor32:
165 case IR::Opcode::BoundImageAtomicExchange32:
166 return false;
167 default:
168 throw InvalidArgument("Invalid opcode {}", inst.GetOpcode());
169 }
170}
171
172bool IsTextureInstruction(const IR::Inst& inst) {
173 return IndexedInstruction(inst) != IR::Opcode::Void;
174}
175
176std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst);
177
178std::optional<ConstBufferAddr> Track(const IR::Value& value) {
179 return IR::BreadthFirstSearch(value, TryGetConstBuffer);
180}
181
182std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
183 switch (inst->GetOpcode()) {
184 default:
185 return std::nullopt;
186 case IR::Opcode::BitwiseOr32: {
187 std::optional lhs{Track(inst->Arg(0))};
188 std::optional rhs{Track(inst->Arg(1))};
189 if (!lhs || !rhs) {
190 return std::nullopt;
191 }
192 if (lhs->has_secondary || rhs->has_secondary) {
193 return std::nullopt;
194 }
195 if (lhs->count > 1 || rhs->count > 1) {
196 return std::nullopt;
197 }
198 if (lhs->index > rhs->index || lhs->offset > rhs->offset) {
199 std::swap(lhs, rhs);
200 }
201 return ConstBufferAddr{
202 .index = lhs->index,
203 .offset = lhs->offset,
204 .secondary_index = rhs->index,
205 .secondary_offset = rhs->offset,
206 .dynamic_offset = {},
207 .count = 1,
208 .has_secondary = true,
209 };
210 }
211 case IR::Opcode::GetCbufU32x2:
212 case IR::Opcode::GetCbufU32:
213 break;
214 }
215 const IR::Value index{inst->Arg(0)};
216 const IR::Value offset{inst->Arg(1)};
217 if (!index.IsImmediate()) {
218 // Reading a bindless texture from variable indices is valid
219 // but not supported here at the moment
220 return std::nullopt;
221 }
222 if (offset.IsImmediate()) {
223 return ConstBufferAddr{
224 .index = index.U32(),
225 .offset = offset.U32(),
226 .secondary_index = 0,
227 .secondary_offset = 0,
228 .dynamic_offset = {},
229 .count = 1,
230 .has_secondary = false,
231 };
232 }
233 IR::Inst* const offset_inst{offset.InstRecursive()};
234 if (offset_inst->GetOpcode() != IR::Opcode::IAdd32) {
235 return std::nullopt;
236 }
237 u32 base_offset{};
238 IR::U32 dynamic_offset;
239 if (offset_inst->Arg(0).IsImmediate()) {
240 base_offset = offset_inst->Arg(0).U32();
241 dynamic_offset = IR::U32{offset_inst->Arg(1)};
242 } else if (offset_inst->Arg(1).IsImmediate()) {
243 base_offset = offset_inst->Arg(1).U32();
244 dynamic_offset = IR::U32{offset_inst->Arg(0)};
245 } else {
246 return std::nullopt;
247 }
248 return ConstBufferAddr{
249 .index = index.U32(),
250 .offset = base_offset,
251 .secondary_index = 0,
252 .secondary_offset = 0,
253 .dynamic_offset = dynamic_offset,
254 .count = 8,
255 .has_secondary = false,
256 };
257}
258
259TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
260 ConstBufferAddr addr;
261 if (IsBindless(inst)) {
262 const std::optional<ConstBufferAddr> track_addr{Track(inst.Arg(0))};
263 if (!track_addr) {
264 throw NotImplementedException("Failed to track bindless texture constant buffer");
265 }
266 addr = *track_addr;
267 } else {
268 addr = ConstBufferAddr{
269 .index = env.TextureBoundBuffer(),
270 .offset = inst.Arg(0).U32(),
271 .secondary_index = 0,
272 .secondary_offset = 0,
273 .dynamic_offset = {},
274 .count = 1,
275 .has_secondary = false,
276 };
277 }
278 return TextureInst{
279 .cbuf = addr,
280 .inst = &inst,
281 .block = block,
282 };
283}
284
285TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
286 const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index};
287 const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset};
288 const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)};
289 const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)};
290 return env.ReadTextureType(lhs_raw | rhs_raw);
291}
292
293class Descriptors {
294public:
295 explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_,
296 ImageBufferDescriptors& image_buffer_descriptors_,
297 TextureDescriptors& texture_descriptors_,
298 ImageDescriptors& image_descriptors_)
299 : texture_buffer_descriptors{texture_buffer_descriptors_},
300 image_buffer_descriptors{image_buffer_descriptors_},
301 texture_descriptors{texture_descriptors_}, image_descriptors{image_descriptors_} {}
302
303 u32 Add(const TextureBufferDescriptor& desc) {
304 return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) {
305 return desc.cbuf_index == existing.cbuf_index &&
306 desc.cbuf_offset == existing.cbuf_offset &&
307 desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
308 desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
309 desc.count == existing.count && desc.size_shift == existing.size_shift &&
310 desc.has_secondary == existing.has_secondary;
311 });
312 }
313
314 u32 Add(const ImageBufferDescriptor& desc) {
315 const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) {
316 return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index &&
317 desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
318 desc.size_shift == existing.size_shift;
319 })};
320 image_buffer_descriptors[index].is_written |= desc.is_written;
321 image_buffer_descriptors[index].is_read |= desc.is_read;
322 return index;
323 }
324
325 u32 Add(const TextureDescriptor& desc) {
326 return Add(texture_descriptors, desc, [&desc](const auto& existing) {
327 return desc.type == existing.type && desc.is_depth == existing.is_depth &&
328 desc.has_secondary == existing.has_secondary &&
329 desc.cbuf_index == existing.cbuf_index &&
330 desc.cbuf_offset == existing.cbuf_offset &&
331 desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
332 desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
333 desc.count == existing.count && desc.size_shift == existing.size_shift;
334 });
335 }
336
337 u32 Add(const ImageDescriptor& desc) {
338 const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) {
339 return desc.type == existing.type && desc.format == existing.format &&
340 desc.cbuf_index == existing.cbuf_index &&
341 desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
342 desc.size_shift == existing.size_shift;
343 })};
344 image_descriptors[index].is_written |= desc.is_written;
345 image_descriptors[index].is_read |= desc.is_read;
346 return index;
347 }
348
349private:
350 template <typename Descriptors, typename Descriptor, typename Func>
351 static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
352 // TODO: Handle arrays
353 const auto it{std::ranges::find_if(descriptors, pred)};
354 if (it != descriptors.end()) {
355 return static_cast<u32>(std::distance(descriptors.begin(), it));
356 }
357 descriptors.push_back(desc);
358 return static_cast<u32>(descriptors.size()) - 1;
359 }
360
361 TextureBufferDescriptors& texture_buffer_descriptors;
362 ImageBufferDescriptors& image_buffer_descriptors;
363 TextureDescriptors& texture_descriptors;
364 ImageDescriptors& image_descriptors;
365};
366} // Anonymous namespace
367
368void TexturePass(Environment& env, IR::Program& program) {
369 TextureInstVector to_replace;
370 for (IR::Block* const block : program.post_order_blocks) {
371 for (IR::Inst& inst : block->Instructions()) {
372 if (!IsTextureInstruction(inst)) {
373 continue;
374 }
375 to_replace.push_back(MakeInst(env, block, inst));
376 }
377 }
378 // Sort instructions to visit textures by constant buffer index, then by offset
379 std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) {
380 return lhs.cbuf.offset < rhs.cbuf.offset;
381 });
382 std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) {
383 return lhs.cbuf.index < rhs.cbuf.index;
384 });
385 Descriptors descriptors{
386 program.info.texture_buffer_descriptors,
387 program.info.image_buffer_descriptors,
388 program.info.texture_descriptors,
389 program.info.image_descriptors,
390 };
391 for (TextureInst& texture_inst : to_replace) {
392 // TODO: Handle arrays
393 IR::Inst* const inst{texture_inst.inst};
394 inst->ReplaceOpcode(IndexedInstruction(*inst));
395
396 const auto& cbuf{texture_inst.cbuf};
397 auto flags{inst->Flags<IR::TextureInstInfo>()};
398 switch (inst->GetOpcode()) {
399 case IR::Opcode::ImageQueryDimensions:
400 flags.type.Assign(ReadTextureType(env, cbuf));
401 inst->SetFlags(flags);
402 break;
403 case IR::Opcode::ImageFetch:
404 if (flags.type != TextureType::Color1D) {
405 break;
406 }
407 if (ReadTextureType(env, cbuf) == TextureType::Buffer) {
408 // Replace with the bound texture type only when it's a texture buffer
409 // If the instruction is 1D and the bound type is 2D, don't change the code and let
410 // the rasterizer robustness handle it
411 // This happens on Fire Emblem: Three Houses
412 flags.type.Assign(TextureType::Buffer);
413 }
414 break;
415 default:
416 break;
417 }
418 u32 index;
419 switch (inst->GetOpcode()) {
420 case IR::Opcode::ImageRead:
421 case IR::Opcode::ImageAtomicIAdd32:
422 case IR::Opcode::ImageAtomicSMin32:
423 case IR::Opcode::ImageAtomicUMin32:
424 case IR::Opcode::ImageAtomicSMax32:
425 case IR::Opcode::ImageAtomicUMax32:
426 case IR::Opcode::ImageAtomicInc32:
427 case IR::Opcode::ImageAtomicDec32:
428 case IR::Opcode::ImageAtomicAnd32:
429 case IR::Opcode::ImageAtomicOr32:
430 case IR::Opcode::ImageAtomicXor32:
431 case IR::Opcode::ImageAtomicExchange32:
432 case IR::Opcode::ImageWrite: {
433 if (cbuf.has_secondary) {
434 throw NotImplementedException("Unexpected separate sampler");
435 }
436 const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead};
437 const bool is_read{inst->GetOpcode() != IR::Opcode::ImageWrite};
438 if (flags.type == TextureType::Buffer) {
439 index = descriptors.Add(ImageBufferDescriptor{
440 .format = flags.image_format,
441 .is_written = is_written,
442 .is_read = is_read,
443 .cbuf_index = cbuf.index,
444 .cbuf_offset = cbuf.offset,
445 .count = cbuf.count,
446 .size_shift = DESCRIPTOR_SIZE_SHIFT,
447 });
448 } else {
449 index = descriptors.Add(ImageDescriptor{
450 .type = flags.type,
451 .format = flags.image_format,
452 .is_written = is_written,
453 .is_read = is_read,
454 .cbuf_index = cbuf.index,
455 .cbuf_offset = cbuf.offset,
456 .count = cbuf.count,
457 .size_shift = DESCRIPTOR_SIZE_SHIFT,
458 });
459 }
460 break;
461 }
462 default:
463 if (flags.type == TextureType::Buffer) {
464 index = descriptors.Add(TextureBufferDescriptor{
465 .has_secondary = cbuf.has_secondary,
466 .cbuf_index = cbuf.index,
467 .cbuf_offset = cbuf.offset,
468 .secondary_cbuf_index = cbuf.secondary_index,
469 .secondary_cbuf_offset = cbuf.secondary_offset,
470 .count = cbuf.count,
471 .size_shift = DESCRIPTOR_SIZE_SHIFT,
472 });
473 } else {
474 index = descriptors.Add(TextureDescriptor{
475 .type = flags.type,
476 .is_depth = flags.is_depth != 0,
477 .has_secondary = cbuf.has_secondary,
478 .cbuf_index = cbuf.index,
479 .cbuf_offset = cbuf.offset,
480 .secondary_cbuf_index = cbuf.secondary_index,
481 .secondary_cbuf_offset = cbuf.secondary_offset,
482 .count = cbuf.count,
483 .size_shift = DESCRIPTOR_SIZE_SHIFT,
484 });
485 }
486 break;
487 }
488 flags.descriptor_index.Assign(index);
489 inst->SetFlags(flags);
490
491 if (cbuf.count > 1) {
492 const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)};
493 IR::IREmitter ir{*texture_inst.block, insert_point};
494 const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))};
495 inst->SetArg(0, ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift));
496 } else {
497 inst->SetArg(0, IR::Value{});
498 }
499 }
500}
501
502void JoinTextureInfo(Info& base, Info& source) {
503 Descriptors descriptors{
504 base.texture_buffer_descriptors,
505 base.image_buffer_descriptors,
506 base.texture_descriptors,
507 base.image_descriptors,
508 };
509 for (auto& desc : source.texture_buffer_descriptors) {
510 descriptors.Add(desc);
511 }
512 for (auto& desc : source.image_buffer_descriptors) {
513 descriptors.Add(desc);
514 }
515 for (auto& desc : source.texture_descriptors) {
516 descriptors.Add(desc);
517 }
518 for (auto& desc : source.image_descriptors) {
519 descriptors.Add(desc);
520 }
521}
522
523} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp
new file mode 100644
index 000000000..975d5aadf
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/verification_pass.cpp
@@ -0,0 +1,98 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <map>
6#include <set>
7
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/ir/basic_block.h"
10#include "shader_recompiler/frontend/ir/value.h"
11#include "shader_recompiler/ir_opt/passes.h"
12
13namespace Shader::Optimization {
14
15static void ValidateTypes(const IR::Program& program) {
16 for (const auto& block : program.blocks) {
17 for (const IR::Inst& inst : *block) {
18 if (inst.GetOpcode() == IR::Opcode::Phi) {
19 // Skip validation on phi nodes
20 continue;
21 }
22 const size_t num_args{inst.NumArgs()};
23 for (size_t i = 0; i < num_args; ++i) {
24 const IR::Type t1{inst.Arg(i).Type()};
25 const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)};
26 if (!IR::AreTypesCompatible(t1, t2)) {
27 throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block));
28 }
29 }
30 }
31 }
32}
33
34static void ValidateUses(const IR::Program& program) {
35 std::map<IR::Inst*, int> actual_uses;
36 for (const auto& block : program.blocks) {
37 for (const IR::Inst& inst : *block) {
38 const size_t num_args{inst.NumArgs()};
39 for (size_t i = 0; i < num_args; ++i) {
40 const IR::Value arg{inst.Arg(i)};
41 if (!arg.IsImmediate()) {
42 ++actual_uses[arg.Inst()];
43 }
44 }
45 }
46 }
47 for (const auto [inst, uses] : actual_uses) {
48 if (inst->UseCount() != uses) {
49 throw LogicError("Invalid uses in block: {}", IR::DumpProgram(program));
50 }
51 }
52}
53
54static void ValidateForwardDeclarations(const IR::Program& program) {
55 std::set<const IR::Inst*> definitions;
56 for (const IR::Block* const block : program.blocks) {
57 for (const IR::Inst& inst : *block) {
58 definitions.emplace(&inst);
59 if (inst.GetOpcode() == IR::Opcode::Phi) {
60 // Phi nodes can have forward declarations
61 continue;
62 }
63 const size_t num_args{inst.NumArgs()};
64 for (size_t arg = 0; arg < num_args; ++arg) {
65 if (inst.Arg(arg).IsImmediate()) {
66 continue;
67 }
68 if (!definitions.contains(inst.Arg(arg).Inst())) {
69 throw LogicError("Forward declaration in block: {}", IR::DumpBlock(*block));
70 }
71 }
72 }
73 }
74}
75
76static void ValidatePhiNodes(const IR::Program& program) {
77 for (const IR::Block* const block : program.blocks) {
78 bool no_more_phis{false};
79 for (const IR::Inst& inst : *block) {
80 if (inst.GetOpcode() == IR::Opcode::Phi) {
81 if (no_more_phis) {
82 throw LogicError("Interleaved phi nodes: {}", IR::DumpBlock(*block));
83 }
84 } else {
85 no_more_phis = true;
86 }
87 }
88 }
89}
90
91void VerificationPass(const IR::Program& program) {
92 ValidateTypes(program);
93 ValidateUses(program);
94 ValidateForwardDeclarations(program);
95 ValidatePhiNodes(program);
96}
97
98} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h
new file mode 100644
index 000000000..f8b255b66
--- /dev/null
+++ b/src/shader_recompiler/object_pool.h
@@ -0,0 +1,104 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <type_traits>
9#include <utility>
10
11namespace Shader {
12
13template <typename T>
14requires std::is_destructible_v<T> class ObjectPool {
15public:
16 explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} {
17 node = &chunks.emplace_back(new_chunk_size);
18 }
19
20 template <typename... Args>
21 requires std::is_constructible_v<T, Args...>[[nodiscard]] T* Create(Args&&... args) {
22 return std::construct_at(Memory(), std::forward<Args>(args)...);
23 }
24
25 void ReleaseContents() {
26 if (chunks.empty()) {
27 return;
28 }
29 Chunk& root{chunks.front()};
30 if (root.used_objects == root.num_objects) {
31 // Root chunk has been filled, squash allocations into it
32 const size_t total_objects{root.num_objects + new_chunk_size * (chunks.size() - 1)};
33 chunks.clear();
34 chunks.emplace_back(total_objects);
35 } else {
36 root.Release();
37 chunks.resize(1);
38 }
39 chunks.shrink_to_fit();
40 node = &chunks.front();
41 }
42
43private:
44 struct NonTrivialDummy {
45 NonTrivialDummy() noexcept {}
46 };
47
48 union Storage {
49 Storage() noexcept {}
50 ~Storage() noexcept {}
51
52 NonTrivialDummy dummy{};
53 T object;
54 };
55
56 struct Chunk {
57 explicit Chunk() = default;
58 explicit Chunk(size_t size)
59 : num_objects{size}, storage{std::make_unique<Storage[]>(size)} {}
60
61 Chunk& operator=(Chunk&& rhs) noexcept {
62 Release();
63 used_objects = std::exchange(rhs.used_objects, 0);
64 num_objects = std::exchange(rhs.num_objects, 0);
65 storage = std::move(rhs.storage);
66 }
67
68 Chunk(Chunk&& rhs) noexcept
69 : used_objects{std::exchange(rhs.used_objects, 0)},
70 num_objects{std::exchange(rhs.num_objects, 0)}, storage{std::move(rhs.storage)} {}
71
72 ~Chunk() {
73 Release();
74 }
75
76 void Release() {
77 std::destroy_n(storage.get(), used_objects);
78 used_objects = 0;
79 }
80
81 size_t used_objects{};
82 size_t num_objects{};
83 std::unique_ptr<Storage[]> storage;
84 };
85
86 [[nodiscard]] T* Memory() {
87 Chunk* const chunk{FreeChunk()};
88 return &chunk->storage[chunk->used_objects++].object;
89 }
90
91 [[nodiscard]] Chunk* FreeChunk() {
92 if (node->used_objects != node->num_objects) {
93 return node;
94 }
95 node = &chunks.emplace_back(new_chunk_size);
96 return node;
97 }
98
99 Chunk* node{};
100 std::vector<Chunk> chunks;
101 size_t new_chunk_size{};
102};
103
104} // namespace Shader
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
new file mode 100644
index 000000000..f0c3b3b17
--- /dev/null
+++ b/src/shader_recompiler/profile.h
@@ -0,0 +1,74 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Shader {
10
11struct Profile {
12 u32 supported_spirv{0x00010000};
13
14 bool unified_descriptor_binding{};
15 bool support_descriptor_aliasing{};
16 bool support_int8{};
17 bool support_int16{};
18 bool support_int64{};
19 bool support_vertex_instance_id{};
20 bool support_float_controls{};
21 bool support_separate_denorm_behavior{};
22 bool support_separate_rounding_mode{};
23 bool support_fp16_denorm_preserve{};
24 bool support_fp32_denorm_preserve{};
25 bool support_fp16_denorm_flush{};
26 bool support_fp32_denorm_flush{};
27 bool support_fp16_signed_zero_nan_preserve{};
28 bool support_fp32_signed_zero_nan_preserve{};
29 bool support_fp64_signed_zero_nan_preserve{};
30 bool support_explicit_workgroup_layout{};
31 bool support_vote{};
32 bool support_viewport_index_layer_non_geometry{};
33 bool support_viewport_mask{};
34 bool support_typeless_image_loads{};
35 bool support_demote_to_helper_invocation{};
36 bool support_int64_atomics{};
37 bool support_derivative_control{};
38 bool support_geometry_shader_passthrough{};
39 bool support_gl_nv_gpu_shader_5{};
40 bool support_gl_amd_gpu_shader_half_float{};
41 bool support_gl_texture_shadow_lod{};
42 bool support_gl_warp_intrinsics{};
43 bool support_gl_variable_aoffi{};
44 bool support_gl_sparse_textures{};
45 bool support_gl_derivative_control{};
46
47 bool warp_size_potentially_larger_than_guest{};
48
49 bool lower_left_origin_mode{};
50 /// Fragment outputs have to be declared even if they are not written to avoid undefined values.
51 /// See Ori and the Blind Forest's main menu for reference.
52 bool need_declared_frag_colors{};
53 /// Prevents fast math optimizations that may cause inaccuracies
54 bool need_fastmath_off{};
55
56 /// OpFClamp is broken and OpFMax + OpFMin should be used instead
57 bool has_broken_spirv_clamp{};
58 /// Offset image operands with an unsigned type do not work
59 bool has_broken_unsigned_image_offsets{};
60 /// Signed instructions with unsigned data types are misinterpreted
61 bool has_broken_signed_operations{};
62 /// Float controls break when fp16 is enabled
63 bool has_broken_fp16_float_controls{};
64 /// Dynamic vec4 indexing is broken on some OpenGL drivers
65 bool has_gl_component_indexing_bug{};
66 /// The precise type qualifier is broken in the fragment stage of some drivers
67 bool has_gl_precise_bug{};
68 /// Ignores SPIR-V ordered vs unordered using GLSL semantics
69 bool ignore_nan_fp_comparisons{};
70
71 u32 gl_max_compute_smem_size{};
72};
73
74} // namespace Shader
diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h
new file mode 100644
index 000000000..bd6c2bfb5
--- /dev/null
+++ b/src/shader_recompiler/program_header.h
@@ -0,0 +1,219 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9
10#include "common/bit_field.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13
14namespace Shader {
15
16enum class OutputTopology : u32 {
17 PointList = 1,
18 LineStrip = 6,
19 TriangleStrip = 7,
20};
21
22enum class PixelImap : u8 {
23 Unused = 0,
24 Constant = 1,
25 Perspective = 2,
26 ScreenLinear = 3,
27};
28
29// Documentation in:
30// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
31struct ProgramHeader {
32 union {
33 BitField<0, 5, u32> sph_type;
34 BitField<5, 5, u32> version;
35 BitField<10, 4, u32> shader_type;
36 BitField<14, 1, u32> mrt_enable;
37 BitField<15, 1, u32> kills_pixels;
38 BitField<16, 1, u32> does_global_store;
39 BitField<17, 4, u32> sass_version;
40 BitField<21, 2, u32> reserved1;
41 BitField<24, 1, u32> geometry_passthrough;
42 BitField<25, 1, u32> reserved2;
43 BitField<26, 1, u32> does_load_or_store;
44 BitField<27, 1, u32> does_fp64;
45 BitField<28, 4, u32> stream_out_mask;
46 } common0;
47
48 union {
49 BitField<0, 24, u32> shader_local_memory_low_size;
50 BitField<24, 8, u32> per_patch_attribute_count;
51 } common1;
52
53 union {
54 BitField<0, 24, u32> shader_local_memory_high_size;
55 BitField<24, 8, u32> threads_per_input_primitive;
56 } common2;
57
58 union {
59 BitField<0, 24, u32> shader_local_memory_crs_size;
60 BitField<24, 4, OutputTopology> output_topology;
61 BitField<28, 4, u32> reserved;
62 } common3;
63
64 union {
65 BitField<0, 12, u32> max_output_vertices;
66 BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
67 BitField<20, 4, u32> reserved;
68 BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
69 } common4;
70
71 union {
72 struct {
73 INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
74
75 union {
76 BitField<0, 1, u8> primitive_array_id;
77 BitField<1, 1, u8> rt_array_index;
78 BitField<2, 1, u8> viewport_index;
79 BitField<3, 1, u8> point_size;
80 BitField<4, 1, u8> position_x;
81 BitField<5, 1, u8> position_y;
82 BitField<6, 1, u8> position_z;
83 BitField<7, 1, u8> position_w;
84 u8 raw;
85 } imap_systemb;
86
87 std::array<u8, 16> imap_generic_vector;
88
89 INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
90 union {
91 BitField<0, 8, u16> clip_distances;
92 BitField<8, 1, u16> point_sprite_s;
93 BitField<9, 1, u16> point_sprite_t;
94 BitField<10, 1, u16> fog_coordinate;
95 BitField<12, 1, u16> tessellation_eval_point_u;
96 BitField<13, 1, u16> tessellation_eval_point_v;
97 BitField<14, 1, u16> instance_id;
98 BitField<15, 1, u16> vertex_id;
99 };
100 INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10]
101 INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved
102 INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA
103
104 union {
105 BitField<0, 1, u8> primitive_array_id;
106 BitField<1, 1, u8> rt_array_index;
107 BitField<2, 1, u8> viewport_index;
108 BitField<3, 1, u8> point_size;
109 BitField<4, 1, u8> position_x;
110 BitField<5, 1, u8> position_y;
111 BitField<6, 1, u8> position_z;
112 BitField<7, 1, u8> position_w;
113 u8 raw;
114 } omap_systemb;
115
116 std::array<u8, 16> omap_generic_vector;
117
118 INSERT_PADDING_BYTES_NOINIT(2); // OmapColor
119
120 union {
121 BitField<0, 8, u16> clip_distances;
122 BitField<8, 1, u16> point_sprite_s;
123 BitField<9, 1, u16> point_sprite_t;
124 BitField<10, 1, u16> fog_coordinate;
125 BitField<12, 1, u16> tessellation_eval_point_u;
126 BitField<13, 1, u16> tessellation_eval_point_v;
127 BitField<14, 1, u16> instance_id;
128 BitField<15, 1, u16> vertex_id;
129 } omap_systemc;
130
131 INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10]
132 INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved
133
134 [[nodiscard]] std::array<bool, 4> InputGeneric(size_t index) const noexcept {
135 const int data{imap_generic_vector[index >> 1] >> ((index % 2) * 4)};
136 return {
137 (data & 1) != 0,
138 (data & 2) != 0,
139 (data & 4) != 0,
140 (data & 8) != 0,
141 };
142 }
143
144 [[nodiscard]] std::array<bool, 4> OutputGeneric(size_t index) const noexcept {
145 const int data{omap_generic_vector[index >> 1] >> ((index % 2) * 4)};
146 return {
147 (data & 1) != 0,
148 (data & 2) != 0,
149 (data & 4) != 0,
150 (data & 8) != 0,
151 };
152 }
153 } vtg;
154
155 struct {
156 INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
157
158 union {
159 BitField<0, 1, u8> primitive_array_id;
160 BitField<1, 1, u8> rt_array_index;
161 BitField<2, 1, u8> viewport_index;
162 BitField<3, 1, u8> point_size;
163 BitField<4, 1, u8> position_x;
164 BitField<5, 1, u8> position_y;
165 BitField<6, 1, u8> position_z;
166 BitField<7, 1, u8> position_w;
167 BitField<0, 4, u8> first;
168 BitField<4, 4, u8> position;
169 u8 raw;
170 } imap_systemb;
171
172 union {
173 BitField<0, 2, PixelImap> x;
174 BitField<2, 2, PixelImap> y;
175 BitField<4, 2, PixelImap> z;
176 BitField<6, 2, PixelImap> w;
177 u8 raw;
178 } imap_generic_vector[32];
179
180 INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
181 INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC
182 INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
183 INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved
184
185 struct {
186 u32 target;
187 union {
188 BitField<0, 1, u32> sample_mask;
189 BitField<1, 1, u32> depth;
190 BitField<2, 30, u32> reserved;
191 };
192 } omap;
193
194 [[nodiscard]] std::array<bool, 4> EnabledOutputComponents(u32 rt) const noexcept {
195 const u32 bits{omap.target >> (rt * 4)};
196 return {(bits & 1) != 0, (bits & 2) != 0, (bits & 4) != 0, (bits & 8) != 0};
197 }
198
199 [[nodiscard]] std::array<PixelImap, 4> GenericInputMap(u32 attribute) const {
200 const auto& vector{imap_generic_vector[attribute]};
201 return {vector.x, vector.y, vector.z, vector.w};
202 }
203
204 [[nodiscard]] bool IsGenericVectorActive(size_t index) const {
205 return imap_generic_vector[index].raw != 0;
206 }
207 } ps;
208
209 std::array<u32, 0xf> raw;
210 };
211
212 [[nodiscard]] u64 LocalMemorySize() const noexcept {
213 return static_cast<u64>(common1.shader_local_memory_low_size) |
214 (static_cast<u64>(common2.shader_local_memory_high_size) << 24);
215 }
216};
217static_assert(sizeof(ProgramHeader) == 0x50, "Incorrect structure size");
218
219} // namespace Shader
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h
new file mode 100644
index 000000000..f3f83a258
--- /dev/null
+++ b/src/shader_recompiler/runtime_info.h
@@ -0,0 +1,88 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <bitset>
9#include <optional>
10#include <vector>
11
12#include "common/common_types.h"
13#include "shader_recompiler/varying_state.h"
14
15namespace Shader {
16
17enum class AttributeType : u8 {
18 Float,
19 SignedInt,
20 UnsignedInt,
21 Disabled,
22};
23
24enum class InputTopology {
25 Points,
26 Lines,
27 LinesAdjacency,
28 Triangles,
29 TrianglesAdjacency,
30};
31
32enum class CompareFunction {
33 Never,
34 Less,
35 Equal,
36 LessThanEqual,
37 Greater,
38 NotEqual,
39 GreaterThanEqual,
40 Always,
41};
42
43enum class TessPrimitive {
44 Isolines,
45 Triangles,
46 Quads,
47};
48
49enum class TessSpacing {
50 Equal,
51 FractionalOdd,
52 FractionalEven,
53};
54
55struct TransformFeedbackVarying {
56 u32 buffer{};
57 u32 stride{};
58 u32 offset{};
59 u32 components{};
60};
61
62struct RuntimeInfo {
63 std::array<AttributeType, 32> generic_input_types{};
64 VaryingState previous_stage_stores;
65
66 bool convert_depth_mode{};
67 bool force_early_z{};
68
69 TessPrimitive tess_primitive{};
70 TessSpacing tess_spacing{};
71 bool tess_clockwise{};
72
73 InputTopology input_topology{};
74
75 std::optional<float> fixed_state_point_size;
76 std::optional<CompareFunction> alpha_test_func;
77 float alpha_test_reference{};
78
79 /// Static Y negate value
80 bool y_negate{};
81 /// Use storage buffers instead of global pointers on GLASM
82 bool glasm_use_storage_buffers{};
83
84 /// Transform feedback state for each varying
85 std::vector<TransformFeedbackVarying> xfb_varyings;
86};
87
88} // namespace Shader
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
new file mode 100644
index 000000000..4ef4dbd40
--- /dev/null
+++ b/src/shader_recompiler/shader_info.h
@@ -0,0 +1,193 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <bitset>
9
10#include "common/common_types.h"
11#include "shader_recompiler/frontend/ir/type.h"
12#include "shader_recompiler/varying_state.h"
13
14#include <boost/container/small_vector.hpp>
15#include <boost/container/static_vector.hpp>
16
17namespace Shader {
18
19enum class TextureType : u32 {
20 Color1D,
21 ColorArray1D,
22 Color2D,
23 ColorArray2D,
24 Color3D,
25 ColorCube,
26 ColorArrayCube,
27 Buffer,
28};
29constexpr u32 NUM_TEXTURE_TYPES = 8;
30
31enum class ImageFormat : u32 {
32 Typeless,
33 R8_UINT,
34 R8_SINT,
35 R16_UINT,
36 R16_SINT,
37 R32_UINT,
38 R32G32_UINT,
39 R32G32B32A32_UINT,
40};
41
42enum class Interpolation {
43 Smooth,
44 Flat,
45 NoPerspective,
46};
47
48struct ConstantBufferDescriptor {
49 u32 index;
50 u32 count;
51};
52
53struct StorageBufferDescriptor {
54 u32 cbuf_index;
55 u32 cbuf_offset;
56 u32 count;
57 bool is_written;
58};
59
60struct TextureBufferDescriptor {
61 bool has_secondary;
62 u32 cbuf_index;
63 u32 cbuf_offset;
64 u32 secondary_cbuf_index;
65 u32 secondary_cbuf_offset;
66 u32 count;
67 u32 size_shift;
68};
69using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>;
70
71struct ImageBufferDescriptor {
72 ImageFormat format;
73 bool is_written;
74 bool is_read;
75 u32 cbuf_index;
76 u32 cbuf_offset;
77 u32 count;
78 u32 size_shift;
79};
80using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>;
81
82struct TextureDescriptor {
83 TextureType type;
84 bool is_depth;
85 bool has_secondary;
86 u32 cbuf_index;
87 u32 cbuf_offset;
88 u32 secondary_cbuf_index;
89 u32 secondary_cbuf_offset;
90 u32 count;
91 u32 size_shift;
92};
93using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
94
95struct ImageDescriptor {
96 TextureType type;
97 ImageFormat format;
98 bool is_written;
99 bool is_read;
100 u32 cbuf_index;
101 u32 cbuf_offset;
102 u32 count;
103 u32 size_shift;
104};
105using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
106
107struct Info {
108 static constexpr size_t MAX_CBUFS{18};
109 static constexpr size_t MAX_SSBOS{32};
110
111 bool uses_workgroup_id{};
112 bool uses_local_invocation_id{};
113 bool uses_invocation_id{};
114 bool uses_sample_id{};
115 bool uses_is_helper_invocation{};
116 bool uses_subgroup_invocation_id{};
117 bool uses_subgroup_shuffles{};
118 std::array<bool, 30> uses_patches{};
119
120 std::array<Interpolation, 32> interpolation{};
121 VaryingState loads;
122 VaryingState stores;
123 VaryingState passthrough;
124
125 bool loads_indexed_attributes{};
126
127 std::array<bool, 8> stores_frag_color{};
128 bool stores_sample_mask{};
129 bool stores_frag_depth{};
130
131 bool stores_tess_level_outer{};
132 bool stores_tess_level_inner{};
133
134 bool stores_indexed_attributes{};
135
136 bool stores_global_memory{};
137
138 bool uses_fp16{};
139 bool uses_fp64{};
140 bool uses_fp16_denorms_flush{};
141 bool uses_fp16_denorms_preserve{};
142 bool uses_fp32_denorms_flush{};
143 bool uses_fp32_denorms_preserve{};
144 bool uses_int8{};
145 bool uses_int16{};
146 bool uses_int64{};
147 bool uses_image_1d{};
148 bool uses_sampled_1d{};
149 bool uses_sparse_residency{};
150 bool uses_demote_to_helper_invocation{};
151 bool uses_subgroup_vote{};
152 bool uses_subgroup_mask{};
153 bool uses_fswzadd{};
154 bool uses_derivatives{};
155 bool uses_typeless_image_reads{};
156 bool uses_typeless_image_writes{};
157 bool uses_image_buffers{};
158 bool uses_shared_increment{};
159 bool uses_shared_decrement{};
160 bool uses_global_increment{};
161 bool uses_global_decrement{};
162 bool uses_atomic_f32_add{};
163 bool uses_atomic_f16x2_add{};
164 bool uses_atomic_f16x2_min{};
165 bool uses_atomic_f16x2_max{};
166 bool uses_atomic_f32x2_add{};
167 bool uses_atomic_f32x2_min{};
168 bool uses_atomic_f32x2_max{};
169 bool uses_atomic_s32_min{};
170 bool uses_atomic_s32_max{};
171 bool uses_int64_bit_atomics{};
172 bool uses_global_memory{};
173 bool uses_atomic_image_u32{};
174 bool uses_shadow_lod{};
175
176 IR::Type used_constant_buffer_types{};
177 IR::Type used_storage_buffer_types{};
178
179 u32 constant_buffer_mask{};
180 std::array<u32, MAX_CBUFS> constant_buffer_used_sizes{};
181 u32 nvn_buffer_base{};
182 std::bitset<16> nvn_buffer_used{};
183
184 boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
185 constant_buffer_descriptors;
186 boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors;
187 TextureBufferDescriptors texture_buffer_descriptors;
188 ImageBufferDescriptors image_buffer_descriptors;
189 TextureDescriptors texture_descriptors;
190 ImageDescriptors image_descriptors;
191};
192
193} // namespace Shader
diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h
new file mode 100644
index 000000000..5c1c8d8fc
--- /dev/null
+++ b/src/shader_recompiler/stage.h
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Shader {
10
11enum class Stage : u32 {
12 VertexB,
13 TessellationControl,
14 TessellationEval,
15 Geometry,
16 Fragment,
17
18 Compute,
19
20 VertexA,
21};
22constexpr u32 MaxStageTypes = 6;
23
24[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept {
25 return static_cast<Stage>(static_cast<size_t>(Stage::VertexB) + index);
26}
27
28} // namespace Shader
diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h
new file mode 100644
index 000000000..9d7b24a76
--- /dev/null
+++ b/src/shader_recompiler/varying_state.h
@@ -0,0 +1,69 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <bitset>
8#include <cstddef>
9
10#include "shader_recompiler/frontend/ir/attribute.h"
11
12namespace Shader {
13
14struct VaryingState {
15 std::bitset<256> mask{};
16
17 void Set(IR::Attribute attribute, bool state = true) {
18 mask[static_cast<size_t>(attribute)] = state;
19 }
20
21 [[nodiscard]] bool operator[](IR::Attribute attribute) const noexcept {
22 return mask[static_cast<size_t>(attribute)];
23 }
24
25 [[nodiscard]] bool AnyComponent(IR::Attribute base) const noexcept {
26 return mask[static_cast<size_t>(base) + 0] || mask[static_cast<size_t>(base) + 1] ||
27 mask[static_cast<size_t>(base) + 2] || mask[static_cast<size_t>(base) + 3];
28 }
29
30 [[nodiscard]] bool AllComponents(IR::Attribute base) const noexcept {
31 return mask[static_cast<size_t>(base) + 0] && mask[static_cast<size_t>(base) + 1] &&
32 mask[static_cast<size_t>(base) + 2] && mask[static_cast<size_t>(base) + 3];
33 }
34
35 [[nodiscard]] bool IsUniform(IR::Attribute base) const noexcept {
36 return AnyComponent(base) == AllComponents(base);
37 }
38
39 [[nodiscard]] bool Generic(size_t index, size_t component) const noexcept {
40 return mask[static_cast<size_t>(IR::Attribute::Generic0X) + index * 4 + component];
41 }
42
43 [[nodiscard]] bool Generic(size_t index) const noexcept {
44 return Generic(index, 0) || Generic(index, 1) || Generic(index, 2) || Generic(index, 3);
45 }
46
47 [[nodiscard]] bool ClipDistances() const noexcept {
48 return AnyComponent(IR::Attribute::ClipDistance0) ||
49 AnyComponent(IR::Attribute::ClipDistance4);
50 }
51
52 [[nodiscard]] bool Legacy() const noexcept {
53 return AnyComponent(IR::Attribute::ColorFrontDiffuseR) ||
54 AnyComponent(IR::Attribute::ColorFrontSpecularR) ||
55 AnyComponent(IR::Attribute::ColorBackDiffuseR) ||
56 AnyComponent(IR::Attribute::ColorBackSpecularR) || FixedFunctionTexture();
57 }
58
59 [[nodiscard]] bool FixedFunctionTexture() const noexcept {
60 for (size_t index = 0; index < 10; ++index) {
61 if (AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
62 return true;
63 }
64 }
65 return false;
66 }
67};
68
69} // namespace Shader