summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar lat9nq2021-07-25 15:31:33 -0400
committerGravatar GitHub2021-07-25 15:31:33 -0400
commit09d6cc99435322c5f480eaa2b0967e33f4966ba6 (patch)
tree72cdf06f6b7d77fdf5826104fea691f3ea450f54
parentconfiguration: Use combobox apply template where possible (diff)
parentMerge pull request #6575 from FernandoS27/new_settings (diff)
downloadyuzu-09d6cc99435322c5f480eaa2b0967e33f4966ba6.tar.gz
yuzu-09d6cc99435322c5f480eaa2b0967e33f4966ba6.tar.xz
yuzu-09d6cc99435322c5f480eaa2b0967e33f4966ba6.zip
Merge branch 'master' into fullscreen-enum
-rw-r--r--CMakeModules/GenerateSCMRev.cmake67
-rw-r--r--README.md6
-rw-r--r--dist/qt_themes/default/style.qss20
-rw-r--r--dist/qt_themes/qdarkstyle/style.qss21
-rw-r--r--dist/qt_themes/qdarkstyle_midnight_blue/style.qss21
m---------externals/Vulkan-Headers0
m---------externals/sirit0
-rw-r--r--src/CMakeLists.txt1
-rw-r--r--src/common/CMakeLists.txt66
-rw-r--r--src/common/logging/filter.cpp4
-rw-r--r--src/common/logging/types.h4
-rw-r--r--src/common/scm_rev.cpp.in2
-rw-r--r--src/common/settings.cpp4
-rw-r--r--src/common/settings.h11
-rw-r--r--src/common/thread_worker.h3
-rw-r--r--src/core/CMakeLists.txt2
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp3
-rw-r--r--src/core/hle/kernel/k_auto_object.cpp9
-rw-r--r--src/core/hle/kernel/k_auto_object.h12
-rw-r--r--src/core/hle/kernel/k_process.cpp8
-rw-r--r--src/core/hle/kernel/k_server_session.cpp5
-rw-r--r--src/core/hle/kernel/kernel.cpp97
-rw-r--r--src/core/hle/kernel/kernel.h17
-rw-r--r--src/core/hle/kernel/svc.cpp6
-rw-r--r--src/core/hle/service/hid/controllers/npad.cpp13
-rw-r--r--src/core/hle/service/hid/controllers/npad.h8
-rw-r--r--src/core/hle/service/hid/hid.cpp14
-rw-r--r--src/core/hle/service/hid/hid.h13
-rw-r--r--src/core/hle/service/kernel_helpers.cpp62
-rw-r--r--src/core/hle/service/kernel_helpers.h35
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp11
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.h3
-rw-r--r--src/core/hle/service/service.cpp11
-rw-r--r--src/core/hle/service/service.h7
-rw-r--r--src/core/hle/service/sm/sm.cpp65
-rw-r--r--src/core/hle/service/sm/sm.h14
-rw-r--r--src/core/reporter.cpp1
-rw-r--r--src/core/telemetry_session.cpp4
-rw-r--r--src/shader_recompiler/CMakeLists.txt268
-rw-r--r--src/shader_recompiler/backend/bindings.h19
-rw-r--r--src/shader_recompiler/backend/glasm/emit_context.cpp154
-rw-r--r--src/shader_recompiler/backend/glasm/emit_context.h80
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm.cpp492
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm.h25
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp91
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp244
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp346
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp231
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp414
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_image.cpp850
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_instructions.h625
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp294
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp568
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp273
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_select.cpp67
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp58
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_special.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp150
-rw-r--r--src/shader_recompiler/backend/glasm/reg_alloc.cpp186
-rw-r--r--src/shader_recompiler/backend/glasm/reg_alloc.h303
-rw-r--r--src/shader_recompiler/backend/glsl/emit_context.cpp715
-rw-r--r--src/shader_recompiler/backend/glsl/emit_context.h174
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl.cpp252
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl.h24
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp418
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp21
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp94
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp219
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp456
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp21
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp230
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp456
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_image.cpp799
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_instructions.h702
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp253
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp28
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp202
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp105
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_select.cpp55
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp79
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_special.cpp111
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp32
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp217
-rw-r--r--src/shader_recompiler/backend/glsl/var_alloc.cpp308
-rw-r--r--src/shader_recompiler/backend/glsl/var_alloc.h105
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.cpp1368
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.h307
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.cpp541
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.h27
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp448
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp38
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp66
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp155
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp505
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp28
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp269
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp396
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_image.cpp462
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp183
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_instructions.h579
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp270
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp26
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp275
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_select.cpp42
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp174
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_special.cpp150
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp30
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp203
-rw-r--r--src/shader_recompiler/environment.h53
-rw-r--r--src/shader_recompiler/exception.h66
-rw-r--r--src/shader_recompiler/frontend/ir/abstract_syntax_list.h58
-rw-r--r--src/shader_recompiler/frontend/ir/attribute.cpp454
-rw-r--r--src/shader_recompiler/frontend/ir/attribute.h250
-rw-r--r--src/shader_recompiler/frontend/ir/basic_block.cpp149
-rw-r--r--src/shader_recompiler/frontend/ir/basic_block.h185
-rw-r--r--src/shader_recompiler/frontend/ir/breadth_first_search.h56
-rw-r--r--src/shader_recompiler/frontend/ir/condition.cpp29
-rw-r--r--src/shader_recompiler/frontend/ir/condition.h60
-rw-r--r--src/shader_recompiler/frontend/ir/flow_test.cpp83
-rw-r--r--src/shader_recompiler/frontend/ir/flow_test.h62
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp2017
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h413
-rw-r--r--src/shader_recompiler/frontend/ir/microinstruction.cpp411
-rw-r--r--src/shader_recompiler/frontend/ir/modifiers.h49
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.cpp15
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.h110
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.inc550
-rw-r--r--src/shader_recompiler/frontend/ir/patch.cpp28
-rw-r--r--src/shader_recompiler/frontend/ir/patch.h149
-rw-r--r--src/shader_recompiler/frontend/ir/post_order.cpp46
-rw-r--r--src/shader_recompiler/frontend/ir/post_order.h14
-rw-r--r--src/shader_recompiler/frontend/ir/pred.h44
-rw-r--r--src/shader_recompiler/frontend/ir/program.cpp32
-rw-r--r--src/shader_recompiler/frontend/ir/program.h35
-rw-r--r--src/shader_recompiler/frontend/ir/reg.h332
-rw-r--r--src/shader_recompiler/frontend/ir/type.cpp38
-rw-r--r--src/shader_recompiler/frontend/ir/type.h61
-rw-r--r--src/shader_recompiler/frontend/ir/value.cpp99
-rw-r--r--src/shader_recompiler/frontend/ir/value.h398
-rw-r--r--src/shader_recompiler/frontend/maxwell/control_flow.cpp642
-rw-r--r--src/shader_recompiler/frontend/maxwell/control_flow.h169
-rw-r--r--src/shader_recompiler/frontend/maxwell/decode.cpp149
-rw-r--r--src/shader_recompiler/frontend/maxwell/decode.h14
-rw-r--r--src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp108
-rw-r--r--src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h28
-rw-r--r--src/shader_recompiler/frontend/maxwell/instruction.h63
-rw-r--r--src/shader_recompiler/frontend/maxwell/location.h112
-rw-r--r--src/shader_recompiler/frontend/maxwell/maxwell.inc286
-rw-r--r--src/shader_recompiler/frontend/maxwell/opcodes.cpp26
-rw-r--r--src/shader_recompiler/frontend/maxwell/opcodes.h30
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp883
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.h20
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp214
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp110
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp35
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp96
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp74
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp36
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h57
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp153
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h28
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp72
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp58
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp50
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp54
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp43
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp47
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp82
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp78
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp214
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp253
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp94
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp127
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp41
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp60
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp125
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp169
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h42
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp143
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp117
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp118
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp272
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.h387
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp105
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp122
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp48
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp80
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp182
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp82
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp64
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp36
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp86
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp58
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp135
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp126
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp53
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h39
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp108
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp196
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp218
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp184
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp116
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp122
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp181
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp283
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp45
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp46
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp38
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp53
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp205
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp281
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp236
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp266
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp208
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp134
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp182
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp165
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp242
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp131
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp76
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp30
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h23
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp64
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp54
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp69
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/translate.cpp52
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/translate.h14
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp223
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.h23
-rw-r--r--src/shader_recompiler/host_translate_info.h18
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp928
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp610
-rw-r--r--src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp26
-rw-r--r--src/shader_recompiler/ir_opt/dual_vertex_pass.cpp30
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp526
-rw-r--r--src/shader_recompiler/ir_opt/identity_removal_pass.cpp38
-rw-r--r--src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp143
-rw-r--r--src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp218
-rw-r--r--src/shader_recompiler/ir_opt/passes.h32
-rw-r--r--src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp383
-rw-r--r--src/shader_recompiler/ir_opt/texture_pass.cpp523
-rw-r--r--src/shader_recompiler/ir_opt/verification_pass.cpp98
-rw-r--r--src/shader_recompiler/object_pool.h104
-rw-r--r--src/shader_recompiler/profile.h74
-rw-r--r--src/shader_recompiler/program_header.h219
-rw-r--r--src/shader_recompiler/runtime_info.h88
-rw-r--r--src/shader_recompiler/shader_info.h193
-rw-r--r--src/shader_recompiler/stage.h28
-rw-r--r--src/shader_recompiler/varying_state.h69
-rw-r--r--src/tests/common/unique_function.cpp2
-rw-r--r--src/video_core/CMakeLists.txt80
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h277
-rw-r--r--src/video_core/dirty_flags.cpp6
-rw-r--r--src/video_core/dirty_flags.h2
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h103
-rw-r--r--src/video_core/engines/kepler_compute.cpp45
-rw-r--r--src/video_core/engines/kepler_compute.h21
-rw-r--r--src/video_core/engines/maxwell_3d.cpp39
-rw-r--r--src/video_core/engines/maxwell_3d.h46
-rw-r--r--src/video_core/engines/maxwell_dma.cpp3
-rw-r--r--src/video_core/engines/shader_bytecode.h2298
-rw-r--r--src/video_core/engines/shader_header.h158
-rw-r--r--src/video_core/engines/shader_type.h21
-rw-r--r--src/video_core/guest_driver.cpp37
-rw-r--r--src/video_core/guest_driver.h46
-rw-r--r--src/video_core/memory_manager.cpp1
-rw-r--r--src/video_core/rasterizer_interface.h16
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp2124
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.h29
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp86
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h57
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.cpp209
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.h93
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp146
-rw-r--r--src/video_core/renderer_opengl/gl_device.h84
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp572
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h169
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp448
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h44
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp27
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h14
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp994
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h172
-rw-r--r--src/video_core/renderer_opengl/gl_shader_context.h33
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp2986
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h69
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp482
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h176
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp146
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h185
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp123
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h89
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp361
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h51
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h108
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp60
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h6
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp23
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp40
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h2
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp92
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h79
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp54
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h7
-rw-r--r--src/video_core/renderer_vulkan/pipeline_helper.h154
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp60
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp94
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp68
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h22
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp270
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h34
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp296
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h72
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp172
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.h70
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp839
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h145
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp867
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h176
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp475
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h59
-rw-r--r--src/video_core/renderer_vulkan/vk_render_pass_cache.cpp96
-rw-r--r--src/video_core/renderer_vulkan/vk_render_pass_cache.h55
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.cpp12
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.h12
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp172
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h38
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp3166
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h99
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp56
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h15
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp59
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h31
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp243
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h80
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp13
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h4
-rw-r--r--src/video_core/shader/ast.cpp752
-rw-r--r--src/video_core/shader/ast.h398
-rw-r--r--src/video_core/shader/async_shaders.cpp234
-rw-r--r--src/video_core/shader/async_shaders.h138
-rw-r--r--src/video_core/shader/compiler_settings.cpp26
-rw-r--r--src/video_core/shader/compiler_settings.h26
-rw-r--r--src/video_core/shader/control_flow.cpp751
-rw-r--r--src/video_core/shader/control_flow.h117
-rw-r--r--src/video_core/shader/decode.cpp368
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp166
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp101
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp54
-rw-r--r--src/video_core/shader/decode/arithmetic_immediate.cpp53
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp375
-rw-r--r--src/video_core/shader/decode/arithmetic_integer_immediate.cpp99
-rw-r--r--src/video_core/shader/decode/bfe.cpp77
-rw-r--r--src/video_core/shader/decode/bfi.cpp45
-rw-r--r--src/video_core/shader/decode/conversion.cpp321
-rw-r--r--src/video_core/shader/decode/ffma.cpp62
-rw-r--r--src/video_core/shader/decode/float_set.cpp58
-rw-r--r--src/video_core/shader/decode/float_set_predicate.cpp57
-rw-r--r--src/video_core/shader/decode/half_set.cpp115
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp80
-rw-r--r--src/video_core/shader/decode/hfma2.cpp73
-rw-r--r--src/video_core/shader/decode/image.cpp536
-rw-r--r--src/video_core/shader/decode/integer_set.cpp49
-rw-r--r--src/video_core/shader/decode/integer_set_predicate.cpp53
-rw-r--r--src/video_core/shader/decode/memory.cpp493
-rw-r--r--src/video_core/shader/decode/other.cpp322
-rw-r--r--src/video_core/shader/decode/predicate_set_predicate.cpp68
-rw-r--r--src/video_core/shader/decode/predicate_set_register.cpp46
-rw-r--r--src/video_core/shader/decode/register_set_predicate.cpp86
-rw-r--r--src/video_core/shader/decode/shift.cpp153
-rw-r--r--src/video_core/shader/decode/texture.cpp935
-rw-r--r--src/video_core/shader/decode/video.cpp169
-rw-r--r--src/video_core/shader/decode/warp.cpp117
-rw-r--r--src/video_core/shader/decode/xmad.cpp156
-rw-r--r--src/video_core/shader/expr.cpp93
-rw-r--r--src/video_core/shader/expr.h156
-rw-r--r--src/video_core/shader/memory_util.cpp76
-rw-r--r--src/video_core/shader/memory_util.h43
-rw-r--r--src/video_core/shader/node.h701
-rw-r--r--src/video_core/shader/node_helper.cpp115
-rw-r--r--src/video_core/shader/node_helper.h71
-rw-r--r--src/video_core/shader/registry.cpp181
-rw-r--r--src/video_core/shader/registry.h172
-rw-r--r--src/video_core/shader/shader_ir.cpp464
-rw-r--r--src/video_core/shader/shader_ir.h479
-rw-r--r--src/video_core/shader/track.cpp236
-rw-r--r--src/video_core/shader/transform_feedback.cpp115
-rw-r--r--src/video_core/shader/transform_feedback.h23
-rw-r--r--src/video_core/shader_cache.cpp250
-rw-r--r--src/video_core/shader_cache.h215
-rw-r--r--src/video_core/shader_environment.cpp460
-rw-r--r--src/video_core/shader_environment.h183
-rw-r--r--src/video_core/shader_notify.cpp51
-rw-r--r--src/video_core/shader_notify.h28
-rw-r--r--src/video_core/texture_cache/formatter.cpp4
-rw-r--r--src/video_core/texture_cache/formatter.h3
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp9
-rw-r--r--src/video_core/texture_cache/image_view_base.h1
-rw-r--r--src/video_core/texture_cache/texture_cache.h35
-rw-r--r--src/video_core/textures/texture.h9
-rw-r--r--src/video_core/transform_feedback.cpp99
-rw-r--r--src/video_core/transform_feedback.h30
-rw-r--r--src/video_core/vulkan_common/nsight_aftermath_tracker.cpp7
-rw-r--r--src/video_core/vulkan_common/nsight_aftermath_tracker.h21
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp362
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h161
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp5
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h44
-rw-r--r--src/yuzu/bootmanager.cpp13
-rw-r--r--src/yuzu/configuration/config.cpp11
-rw-r--r--src/yuzu/configuration/config.h3
-rw-r--r--src/yuzu/configuration/configure_debug.cpp8
-rw-r--r--src/yuzu/configuration/configure_debug.ui26
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp76
-rw-r--r--src/yuzu/configuration/configure_graphics.h4
-rw-r--r--src/yuzu/configuration/configure_graphics.ui118
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp7
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.h1
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui20
-rw-r--r--src/yuzu/game_list.cpp14
-rw-r--r--src/yuzu/game_list.h4
-rw-r--r--src/yuzu/main.cpp181
-rw-r--r--src/yuzu/main.h7
-rw-r--r--src/yuzu_cmd/config.cpp4
-rw-r--r--src/yuzu_cmd/default_ini.h15
-rw-r--r--src/yuzu_cmd/yuzu.cpp8
453 files changed, 49784 insertions, 27358 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
index 311ba1c2e..43ca730ec 100644
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -48,69 +48,6 @@ if (BUILD_REPOSITORY)
48 endif() 48 endif()
49endif() 49endif()
50 50
51# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) 51# The variable SRC_DIR must be passed into the script
52set(VIDEO_CORE "${SRC_DIR}/src/video_core") 52# (since it uses the current build directory for all values of CMAKE_*_DIR)
53set(HASH_FILES
54 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
55 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
56 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
57 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
58 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
59 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
60 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
61 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
62 "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
63 "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
64 "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
65 "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
66 "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
67 "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
68 "${VIDEO_CORE}/shader/decode/bfe.cpp"
69 "${VIDEO_CORE}/shader/decode/bfi.cpp"
70 "${VIDEO_CORE}/shader/decode/conversion.cpp"
71 "${VIDEO_CORE}/shader/decode/ffma.cpp"
72 "${VIDEO_CORE}/shader/decode/float_set.cpp"
73 "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
74 "${VIDEO_CORE}/shader/decode/half_set.cpp"
75 "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
76 "${VIDEO_CORE}/shader/decode/hfma2.cpp"
77 "${VIDEO_CORE}/shader/decode/image.cpp"
78 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
79 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
80 "${VIDEO_CORE}/shader/decode/memory.cpp"
81 "${VIDEO_CORE}/shader/decode/texture.cpp"
82 "${VIDEO_CORE}/shader/decode/other.cpp"
83 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
84 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
85 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
86 "${VIDEO_CORE}/shader/decode/shift.cpp"
87 "${VIDEO_CORE}/shader/decode/video.cpp"
88 "${VIDEO_CORE}/shader/decode/warp.cpp"
89 "${VIDEO_CORE}/shader/decode/xmad.cpp"
90 "${VIDEO_CORE}/shader/ast.cpp"
91 "${VIDEO_CORE}/shader/ast.h"
92 "${VIDEO_CORE}/shader/compiler_settings.cpp"
93 "${VIDEO_CORE}/shader/compiler_settings.h"
94 "${VIDEO_CORE}/shader/control_flow.cpp"
95 "${VIDEO_CORE}/shader/control_flow.h"
96 "${VIDEO_CORE}/shader/decode.cpp"
97 "${VIDEO_CORE}/shader/expr.cpp"
98 "${VIDEO_CORE}/shader/expr.h"
99 "${VIDEO_CORE}/shader/node.h"
100 "${VIDEO_CORE}/shader/node_helper.cpp"
101 "${VIDEO_CORE}/shader/node_helper.h"
102 "${VIDEO_CORE}/shader/registry.cpp"
103 "${VIDEO_CORE}/shader/registry.h"
104 "${VIDEO_CORE}/shader/shader_ir.cpp"
105 "${VIDEO_CORE}/shader/shader_ir.h"
106 "${VIDEO_CORE}/shader/track.cpp"
107 "${VIDEO_CORE}/shader/transform_feedback.cpp"
108 "${VIDEO_CORE}/shader/transform_feedback.h"
109)
110set(COMBINED "")
111foreach (F IN LISTS HASH_FILES)
112 file(READ ${F} TMP)
113 set(COMBINED "${COMBINED}${TMP}")
114endforeach()
115string(MD5 SHADER_CACHE_VERSION "${COMBINED}")
116configure_file("${SRC_DIR}/src/common/scm_rev.cpp.in" "scm_rev.cpp" @ONLY) 53configure_file("${SRC_DIR}/src/common/scm_rev.cpp.in" "scm_rev.cpp" @ONLY)
diff --git a/README.md b/README.md
index a8821126f..2cb030aed 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,7 @@ It is written in C++ with portability in mind, and we actively maintain builds f
35 35
36The emulator is capable of running most commercial games at full speed, provided you meet the [necessary hardware requirements](https://yuzu-emu.org/help/quickstart/#hardware-requirements). 36The emulator is capable of running most commercial games at full speed, provided you meet the [necessary hardware requirements](https://yuzu-emu.org/help/quickstart/#hardware-requirements).
37 37
38For a full list of games yuzu support, please visit our [Compatibility page](https://yuzu-emu.org/game/) 38For a full list of games yuzu support, please visit our [Compatibility page](https://yuzu-emu.org/game/)
39 39
40Check out our [website](https://yuzu-emu.org/) for the latest news on exciting features, monthly progress reports, and more! 40Check out our [website](https://yuzu-emu.org/) for the latest news on exciting features, monthly progress reports, and more!
41 41
@@ -43,7 +43,7 @@ Check out our [website](https://yuzu-emu.org/) for the latest news on exciting f
43 43
44Most of the development happens on GitHub. It's also where [our central repository](https://github.com/yuzu-emu/yuzu) is hosted. For development discussion, please join us on [Discord](https://discord.com/invite/u77vRWY). 44Most of the development happens on GitHub. It's also where [our central repository](https://github.com/yuzu-emu/yuzu) is hosted. For development discussion, please join us on [Discord](https://discord.com/invite/u77vRWY).
45 45
46If you want to contribute, please take a look at the [Contributor's Guide](https://github.com/yuzu-emu/yuzu/wiki/Contributing) and [Developer Information](https://github.com/yuzu-emu/yuzu/wiki/Developer-Information). 46If you want to contribute, please take a look at the [Contributor's Guide](https://github.com/yuzu-emu/yuzu/wiki/Contributing) and [Developer Information](https://github.com/yuzu-emu/yuzu/wiki/Developer-Information).
47You can also contact any of the developers on Discord in order to know about the current state of the emulator. 47You can also contact any of the developers on Discord in order to know about the current state of the emulator.
48 48
49If you want to contribute to the user interface translation project, please check out the [yuzu project on transifex](https://www.transifex.com/yuzu-emulator/yuzu). We centralize translation work there, and periodically upstream translations. 49If you want to contribute to the user interface translation project, please check out the [yuzu project on transifex](https://www.transifex.com/yuzu-emulator/yuzu). We centralize translation work there, and periodically upstream translations.
@@ -78,3 +78,5 @@ If you wish to support us a different way, please join our [Discord](https://dis
78## License 78## License
79 79
80yuzu is licensed under the GPLv2 (or any later version). Refer to the [license.txt](https://github.com/yuzu-emu/yuzu/blob/master/license.txt) file. 80yuzu is licensed under the GPLv2 (or any later version). Refer to the [license.txt](https://github.com/yuzu-emu/yuzu/blob/master/license.txt) file.
81
82The [Skyline-Emulator Team](https://github.com/skyline-emu/skyline) is exempt from GPLv2 for the contributions from all these contributors [FernandoS27](https://github.com/FernandoS27), [lioncash](https://github.com/lioncash), [bunnei](https://github.com/bunnei), [ReinUsesLisp](https://github.com/ReinUsesLisp), [Morph1984](https://github.com/Morph1984), [ogniK5377](https://github.com/ogniK5377), [german77](https://github.com/german77), [ameerj](https://github.com/ameerj), [Kelebek1](https://github.com/Kelebek1) and [lat9nq](https://github.com/lat9nq). They may only use the code from these contributors under Mozilla Public License, version 2.0.
diff --git a/dist/qt_themes/default/style.qss b/dist/qt_themes/default/style.qss
index cee219374..9915a40ba 100644
--- a/dist/qt_themes/default/style.qss
+++ b/dist/qt_themes/default/style.qss
@@ -38,6 +38,26 @@ QPushButton#RendererStatusBarButton:!checked {
38 color: #0066ff; 38 color: #0066ff;
39} 39}
40 40
41QPushButton#GPUStatusBarButton {
42 color: #656565;
43 border: 1px solid transparent;
44 background-color: transparent;
45 padding: 0px 3px 0px 3px;
46 text-align: center;
47}
48
49QPushButton#GPUStatusBarButton:hover {
50 border: 1px solid #76797C;
51}
52
53QPushButton#GPUStatusBarButton:checked {
54 color: #ff8040;
55}
56
57QPushButton#GPUStatusBarButton:!checked {
58 color: #40dd40;
59}
60
41QPushButton#buttonRefreshDevices { 61QPushButton#buttonRefreshDevices {
42 min-width: 21px; 62 min-width: 21px;
43 min-height: 21px; 63 min-height: 21px;
diff --git a/dist/qt_themes/qdarkstyle/style.qss b/dist/qt_themes/qdarkstyle/style.qss
index 3d0ccbb9e..dac2dba86 100644
--- a/dist/qt_themes/qdarkstyle/style.qss
+++ b/dist/qt_themes/qdarkstyle/style.qss
@@ -1283,6 +1283,27 @@ QPushButton#RendererStatusBarButton:!checked {
1283 color: #00ccdd; 1283 color: #00ccdd;
1284} 1284}
1285 1285
1286QPushButton#GPUStatusBarButton {
1287 min-width: 0px;
1288 color: #656565;
1289 border: 1px solid transparent;
1290 background-color: transparent;
1291 padding: 0px 3px 0px 3px;
1292 text-align: center;
1293}
1294
1295QPushButton#GPUStatusBarButton:hover {
1296 border: 1px solid #76797C;
1297}
1298
1299QPushButton#GPUStatusBarButton:checked {
1300 color: #ff8040;
1301}
1302
1303QPushButton#GPUStatusBarButton:!checked {
1304 color: #40dd40;
1305}
1306
1286QPushButton#buttonRefreshDevices { 1307QPushButton#buttonRefreshDevices {
1287 min-width: 23px; 1308 min-width: 23px;
1288 min-height: 23px; 1309 min-height: 23px;
diff --git a/dist/qt_themes/qdarkstyle_midnight_blue/style.qss b/dist/qt_themes/qdarkstyle_midnight_blue/style.qss
index 51bec2fd7..032d05ec6 100644
--- a/dist/qt_themes/qdarkstyle_midnight_blue/style.qss
+++ b/dist/qt_themes/qdarkstyle_midnight_blue/style.qss
@@ -2186,6 +2186,27 @@ QPushButton#RendererStatusBarButton:!checked {
2186 color: #00ccdd; 2186 color: #00ccdd;
2187} 2187}
2188 2188
2189QPushButton#GPUStatusBarButton {
2190 min-width: 0px;
2191 color: #656565;
2192 border: 1px solid transparent;
2193 background-color: transparent;
2194 padding: 0px 3px 0px 3px;
2195 text-align: center;
2196}
2197
2198QPushButton#GPUStatusBarButton:hover {
2199 border: 1px solid #76797C;
2200}
2201
2202QPushButton#GPUStatusBarButton:checked {
2203 color: #ff8040;
2204}
2205
2206QPushButton#GPUStatusBarButton:!checked {
2207 color: #40dd40;
2208}
2209
2189QPushButton#buttonRefreshDevices { 2210QPushButton#buttonRefreshDevices {
2190 min-width: 19px; 2211 min-width: 19px;
2191 min-height: 19px; 2212 min-height: 19px;
diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers
Subproject 8188e3fbbc105591064093440f88081fb957d4f Subproject 07c4a37bcf41ea50aef6e98236abdfe8089fb4c
diff --git a/externals/sirit b/externals/sirit
Subproject eefca56afd49379bdebc97ded8b480839f93088 Subproject a39596358a3a5488c06554c0c15184a6af71e43
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f8ec8fea8..6e66dc1df 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -142,6 +142,7 @@ add_subdirectory(core)
142add_subdirectory(audio_core) 142add_subdirectory(audio_core)
143add_subdirectory(video_core) 143add_subdirectory(video_core)
144add_subdirectory(input_common) 144add_subdirectory(input_common)
145add_subdirectory(shader_recompiler)
145add_subdirectory(tests) 146add_subdirectory(tests)
146 147
147if (ENABLE_SDL2) 148if (ENABLE_SDL2)
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index e03fffd8d..57922b51c 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -1,8 +1,3 @@
1# Add a custom command to generate a new shader_cache_version hash when any of the following files change
2# NOTE: This is an approximation of what files affect shader generation, its possible something else
3# could affect the result, but much more unlikely than the following files. Keeping a list of files
4# like this allows for much better caching since it doesn't force the user to recompile binary shaders every update
5set(VIDEO_CORE "${CMAKE_SOURCE_DIR}/src/video_core")
6if (DEFINED ENV{AZURECIREPO}) 1if (DEFINED ENV{AZURECIREPO})
7 set(BUILD_REPOSITORY $ENV{AZURECIREPO}) 2 set(BUILD_REPOSITORY $ENV{AZURECIREPO})
8endif() 3endif()
@@ -30,64 +25,7 @@ add_custom_command(OUTPUT scm_rev.cpp
30 -DGIT_EXECUTABLE=${GIT_EXECUTABLE} 25 -DGIT_EXECUTABLE=${GIT_EXECUTABLE}
31 -P ${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake 26 -P ${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake
32 DEPENDS 27 DEPENDS
33 # WARNING! It was too much work to try and make a common location for this list, 28 # Check that the scm_rev files haven't changed
34 # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
35 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
36 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
37 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
38 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
39 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
40 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
41 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
42 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
43 "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
44 "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
45 "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
46 "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
47 "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
48 "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
49 "${VIDEO_CORE}/shader/decode/bfe.cpp"
50 "${VIDEO_CORE}/shader/decode/bfi.cpp"
51 "${VIDEO_CORE}/shader/decode/conversion.cpp"
52 "${VIDEO_CORE}/shader/decode/ffma.cpp"
53 "${VIDEO_CORE}/shader/decode/float_set.cpp"
54 "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
55 "${VIDEO_CORE}/shader/decode/half_set.cpp"
56 "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
57 "${VIDEO_CORE}/shader/decode/hfma2.cpp"
58 "${VIDEO_CORE}/shader/decode/image.cpp"
59 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
60 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
61 "${VIDEO_CORE}/shader/decode/memory.cpp"
62 "${VIDEO_CORE}/shader/decode/texture.cpp"
63 "${VIDEO_CORE}/shader/decode/other.cpp"
64 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
65 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
66 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
67 "${VIDEO_CORE}/shader/decode/shift.cpp"
68 "${VIDEO_CORE}/shader/decode/video.cpp"
69 "${VIDEO_CORE}/shader/decode/warp.cpp"
70 "${VIDEO_CORE}/shader/decode/xmad.cpp"
71 "${VIDEO_CORE}/shader/ast.cpp"
72 "${VIDEO_CORE}/shader/ast.h"
73 "${VIDEO_CORE}/shader/compiler_settings.cpp"
74 "${VIDEO_CORE}/shader/compiler_settings.h"
75 "${VIDEO_CORE}/shader/control_flow.cpp"
76 "${VIDEO_CORE}/shader/control_flow.h"
77 "${VIDEO_CORE}/shader/decode.cpp"
78 "${VIDEO_CORE}/shader/expr.cpp"
79 "${VIDEO_CORE}/shader/expr.h"
80 "${VIDEO_CORE}/shader/node.h"
81 "${VIDEO_CORE}/shader/node_helper.cpp"
82 "${VIDEO_CORE}/shader/node_helper.h"
83 "${VIDEO_CORE}/shader/registry.cpp"
84 "${VIDEO_CORE}/shader/registry.h"
85 "${VIDEO_CORE}/shader/shader_ir.cpp"
86 "${VIDEO_CORE}/shader/shader_ir.h"
87 "${VIDEO_CORE}/shader/track.cpp"
88 "${VIDEO_CORE}/shader/transform_feedback.cpp"
89 "${VIDEO_CORE}/shader/transform_feedback.h"
90 # and also check that the scm_rev files haven't changed
91 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" 29 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
92 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" 30 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"
93 # technically we should regenerate if the git version changed, but its not worth the effort imo 31 # technically we should regenerate if the git version changed, but its not worth the effort imo
@@ -231,7 +169,7 @@ endif()
231 169
232create_target_directory_groups(common) 170create_target_directory_groups(common)
233 171
234target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile) 172target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile Threads::Threads)
235target_link_libraries(common PRIVATE lz4::lz4 xbyak) 173target_link_libraries(common PRIVATE lz4::lz4 xbyak)
236if (MSVC) 174if (MSVC)
237 target_link_libraries(common PRIVATE zstd::zstd) 175 target_link_libraries(common PRIVATE zstd::zstd)
diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp
index 4f2cc29e1..f055f0e11 100644
--- a/src/common/logging/filter.cpp
+++ b/src/common/logging/filter.cpp
@@ -144,6 +144,10 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
144 SUB(Render, Software) \ 144 SUB(Render, Software) \
145 SUB(Render, OpenGL) \ 145 SUB(Render, OpenGL) \
146 SUB(Render, Vulkan) \ 146 SUB(Render, Vulkan) \
147 CLS(Shader) \
148 SUB(Shader, SPIRV) \
149 SUB(Shader, GLASM) \
150 SUB(Shader, GLSL) \
147 CLS(Audio) \ 151 CLS(Audio) \
148 SUB(Audio, DSP) \ 152 SUB(Audio, DSP) \
149 SUB(Audio, Sink) \ 153 SUB(Audio, Sink) \
diff --git a/src/common/logging/types.h b/src/common/logging/types.h
index 88b0e9c01..7ad0334fc 100644
--- a/src/common/logging/types.h
+++ b/src/common/logging/types.h
@@ -114,6 +114,10 @@ enum class Class : u8 {
114 Render_Software, ///< Software renderer backend 114 Render_Software, ///< Software renderer backend
115 Render_OpenGL, ///< OpenGL backend 115 Render_OpenGL, ///< OpenGL backend
116 Render_Vulkan, ///< Vulkan backend 116 Render_Vulkan, ///< Vulkan backend
117 Shader, ///< Shader recompiler
118 Shader_SPIRV, ///< Shader SPIR-V code generation
119 Shader_GLASM, ///< Shader GLASM code generation
120 Shader_GLSL, ///< Shader GLSL code generation
117 Audio, ///< Audio emulation 121 Audio, ///< Audio emulation
118 Audio_DSP, ///< The HLE implementation of the DSP 122 Audio_DSP, ///< The HLE implementation of the DSP
119 Audio_Sink, ///< Emulator audio output backend 123 Audio_Sink, ///< Emulator audio output backend
diff --git a/src/common/scm_rev.cpp.in b/src/common/scm_rev.cpp.in
index 5f126f324..cc88994c6 100644
--- a/src/common/scm_rev.cpp.in
+++ b/src/common/scm_rev.cpp.in
@@ -14,7 +14,6 @@
14#define BUILD_ID "@BUILD_ID@" 14#define BUILD_ID "@BUILD_ID@"
15#define TITLE_BAR_FORMAT_IDLE "@TITLE_BAR_FORMAT_IDLE@" 15#define TITLE_BAR_FORMAT_IDLE "@TITLE_BAR_FORMAT_IDLE@"
16#define TITLE_BAR_FORMAT_RUNNING "@TITLE_BAR_FORMAT_RUNNING@" 16#define TITLE_BAR_FORMAT_RUNNING "@TITLE_BAR_FORMAT_RUNNING@"
17#define SHADER_CACHE_VERSION "@SHADER_CACHE_VERSION@"
18 17
19namespace Common { 18namespace Common {
20 19
@@ -28,7 +27,6 @@ const char g_build_version[] = BUILD_VERSION;
28const char g_build_id[] = BUILD_ID; 27const char g_build_id[] = BUILD_ID;
29const char g_title_bar_format_idle[] = TITLE_BAR_FORMAT_IDLE; 28const char g_title_bar_format_idle[] = TITLE_BAR_FORMAT_IDLE;
30const char g_title_bar_format_running[] = TITLE_BAR_FORMAT_RUNNING; 29const char g_title_bar_format_running[] = TITLE_BAR_FORMAT_RUNNING;
31const char g_shader_cache_version[] = SHADER_CACHE_VERSION;
32 30
33} // namespace 31} // namespace
34 32
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index bf5514386..66268ea0f 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -57,7 +57,7 @@ void LogSettings() {
57 log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); 57 log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue());
58 log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); 58 log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
59 log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); 59 log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
60 log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); 60 log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
61 log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); 61 log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
62 log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue()); 62 log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue());
63 log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); 63 log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue());
@@ -140,7 +140,7 @@ void RestoreGlobalState(bool is_powered_on) {
140 values.use_nvdec_emulation.SetGlobal(true); 140 values.use_nvdec_emulation.SetGlobal(true);
141 values.accelerate_astc.SetGlobal(true); 141 values.accelerate_astc.SetGlobal(true);
142 values.use_vsync.SetGlobal(true); 142 values.use_vsync.SetGlobal(true);
143 values.use_assembly_shaders.SetGlobal(true); 143 values.shader_backend.SetGlobal(true);
144 values.use_asynchronous_shaders.SetGlobal(true); 144 values.use_asynchronous_shaders.SetGlobal(true);
145 values.use_fast_gpu_time.SetGlobal(true); 145 values.use_fast_gpu_time.SetGlobal(true);
146 values.use_caches_gc.SetGlobal(true); 146 values.use_caches_gc.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index fd2a263ec..801bed603 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -24,6 +24,12 @@ enum class RendererBackend : u32 {
24 Vulkan = 1, 24 Vulkan = 1,
25}; 25};
26 26
27enum class ShaderBackend : u32 {
28 GLSL = 0,
29 GLASM = 1,
30 SPIRV = 2,
31};
32
27enum class GPUAccuracy : u32 { 33enum class GPUAccuracy : u32 {
28 Normal = 0, 34 Normal = 0,
29 High = 1, 35 High = 1,
@@ -313,6 +319,9 @@ struct Values {
313 // Renderer 319 // Renderer
314 Setting<RendererBackend> renderer_backend{RendererBackend::OpenGL, "backend"}; 320 Setting<RendererBackend> renderer_backend{RendererBackend::OpenGL, "backend"};
315 BasicSetting<bool> renderer_debug{false, "debug"}; 321 BasicSetting<bool> renderer_debug{false, "debug"};
322 BasicSetting<bool> enable_nsight_aftermath{false, "nsight_aftermath"};
323 BasicSetting<bool> disable_shader_loop_safety_checks{false,
324 "disable_shader_loop_safety_checks"};
316 Setting<int> vulkan_device{0, "vulkan_device"}; 325 Setting<int> vulkan_device{0, "vulkan_device"};
317 326
318 Setting<u16> resolution_factor{1, "resolution_factor"}; 327 Setting<u16> resolution_factor{1, "resolution_factor"};
@@ -336,7 +345,7 @@ struct Values {
336 Setting<bool> accelerate_astc{true, "accelerate_astc"}; 345 Setting<bool> accelerate_astc{true, "accelerate_astc"};
337 Setting<bool> use_vsync{true, "use_vsync"}; 346 Setting<bool> use_vsync{true, "use_vsync"};
338 BasicSetting<bool> disable_fps_limit{false, "disable_fps_limit"}; 347 BasicSetting<bool> disable_fps_limit{false, "disable_fps_limit"};
339 Setting<bool> use_assembly_shaders{false, "use_assembly_shaders"}; 348 Setting<ShaderBackend> shader_backend{ShaderBackend::GLASM, "shader_backend"};
340 Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"}; 349 Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
341 Setting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"}; 350 Setting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
342 Setting<bool> use_caches_gc{false, "use_caches_gc"}; 351 Setting<bool> use_caches_gc{false, "use_caches_gc"};
diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h
index 8272985ff..cd0017726 100644
--- a/src/common/thread_worker.h
+++ b/src/common/thread_worker.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <atomic> 7#include <atomic>
8#include <condition_variable>
8#include <functional> 9#include <functional>
9#include <mutex> 10#include <mutex>
10#include <stop_token> 11#include <stop_token>
@@ -39,7 +40,7 @@ public:
39 const auto lambda = [this, func](std::stop_token stop_token) { 40 const auto lambda = [this, func](std::stop_token stop_token) {
40 Common::SetCurrentThreadName(thread_name.c_str()); 41 Common::SetCurrentThreadName(thread_name.c_str());
41 { 42 {
42 std::conditional_t<with_state, StateType, int> state{func()}; 43 [[maybe_unused]] std::conditional_t<with_state, StateType, int> state{func()};
43 while (!stop_token.stop_requested()) { 44 while (!stop_token.stop_requested()) {
44 Task task; 45 Task task;
45 { 46 {
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index c7b899131..5c99c00f5 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -517,6 +517,8 @@ add_library(core STATIC
517 hle/service/psc/psc.h 517 hle/service/psc/psc.h
518 hle/service/ptm/psm.cpp 518 hle/service/ptm/psm.cpp
519 hle/service/ptm/psm.h 519 hle/service/ptm/psm.h
520 hle/service/kernel_helpers.cpp
521 hle/service/kernel_helpers.h
520 hle/service/service.cpp 522 hle/service/service.cpp
521 hle/service/service.h 523 hle/service/service.h
522 hle/service/set/set.cpp 524 hle/service/set/set.cpp
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 28ed6265a..ca68fc325 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -58,6 +58,9 @@ bool SessionRequestManager::HasSessionRequestHandler(const HLERequestContext& co
58 58
59void SessionRequestHandler::ClientConnected(KServerSession* session) { 59void SessionRequestHandler::ClientConnected(KServerSession* session) {
60 session->ClientConnected(shared_from_this()); 60 session->ClientConnected(shared_from_this());
61
62 // Ensure our server session is tracked globally.
63 kernel.RegisterServerSession(session);
61} 64}
62 65
63void SessionRequestHandler::ClientDisconnected(KServerSession* session) { 66void SessionRequestHandler::ClientDisconnected(KServerSession* session) {
diff --git a/src/core/hle/kernel/k_auto_object.cpp b/src/core/hle/kernel/k_auto_object.cpp
index dbe237f09..c99a9ebb7 100644
--- a/src/core/hle/kernel/k_auto_object.cpp
+++ b/src/core/hle/kernel/k_auto_object.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/hle/kernel/k_auto_object.h" 5#include "core/hle/kernel/k_auto_object.h"
6#include "core/hle/kernel/kernel.h"
6 7
7namespace Kernel { 8namespace Kernel {
8 9
@@ -11,4 +12,12 @@ KAutoObject* KAutoObject::Create(KAutoObject* obj) {
11 return obj; 12 return obj;
12} 13}
13 14
15void KAutoObject::RegisterWithKernel() {
16 kernel.RegisterKernelObject(this);
17}
18
19void KAutoObject::UnregisterWithKernel() {
20 kernel.UnregisterKernelObject(this);
21}
22
14} // namespace Kernel 23} // namespace Kernel
diff --git a/src/core/hle/kernel/k_auto_object.h b/src/core/hle/kernel/k_auto_object.h
index 88a052f65..e4fcdbc67 100644
--- a/src/core/hle/kernel/k_auto_object.h
+++ b/src/core/hle/kernel/k_auto_object.h
@@ -85,8 +85,12 @@ private:
85 KERNEL_AUTOOBJECT_TRAITS(KAutoObject, KAutoObject); 85 KERNEL_AUTOOBJECT_TRAITS(KAutoObject, KAutoObject);
86 86
87public: 87public:
88 explicit KAutoObject(KernelCore& kernel_) : kernel(kernel_) {} 88 explicit KAutoObject(KernelCore& kernel_) : kernel(kernel_) {
89 virtual ~KAutoObject() = default; 89 RegisterWithKernel();
90 }
91 virtual ~KAutoObject() {
92 UnregisterWithKernel();
93 }
90 94
91 static KAutoObject* Create(KAutoObject* ptr); 95 static KAutoObject* Create(KAutoObject* ptr);
92 96
@@ -166,6 +170,10 @@ public:
166 } 170 }
167 } 171 }
168 172
173private:
174 void RegisterWithKernel();
175 void UnregisterWithKernel();
176
169protected: 177protected:
170 KernelCore& kernel; 178 KernelCore& kernel;
171 std::string name; 179 std::string name;
diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp
index d1bd98051..8ead1a769 100644
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@@ -10,6 +10,7 @@
10#include "common/alignment.h" 10#include "common/alignment.h"
11#include "common/assert.h" 11#include "common/assert.h"
12#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "common/scope_exit.h"
13#include "common/settings.h" 14#include "common/settings.h"
14#include "core/core.h" 15#include "core/core.h"
15#include "core/device_memory.h" 16#include "core/device_memory.h"
@@ -43,6 +44,8 @@ void SetupMainThread(Core::System& system, KProcess& owner_process, u32 priority
43 ASSERT(owner_process.GetResourceLimit()->Reserve(LimitableResource::Threads, 1)); 44 ASSERT(owner_process.GetResourceLimit()->Reserve(LimitableResource::Threads, 1));
44 45
45 KThread* thread = KThread::Create(system.Kernel()); 46 KThread* thread = KThread::Create(system.Kernel());
47 SCOPE_EXIT({ thread->Close(); });
48
46 ASSERT(KThread::InitializeUserThread(system, thread, entry_point, 0, stack_top, priority, 49 ASSERT(KThread::InitializeUserThread(system, thread, entry_point, 0, stack_top, priority,
47 owner_process.GetIdealCoreId(), &owner_process) 50 owner_process.GetIdealCoreId(), &owner_process)
48 .IsSuccess()); 51 .IsSuccess());
@@ -162,7 +165,7 @@ void KProcess::DecrementThreadCount() {
162 ASSERT(num_threads > 0); 165 ASSERT(num_threads > 0);
163 166
164 if (const auto count = --num_threads; count == 0) { 167 if (const auto count = --num_threads; count == 0) {
165 UNIMPLEMENTED_MSG("Process termination is not implemented!"); 168 LOG_WARNING(Kernel, "Process termination is not fully implemented.");
166 } 169 }
167} 170}
168 171
@@ -406,6 +409,9 @@ void KProcess::Finalize() {
406 resource_limit->Close(); 409 resource_limit->Close();
407 } 410 }
408 411
412 // Finalize the handle table and close any open handles.
413 handle_table.Finalize();
414
409 // Perform inherited finalization. 415 // Perform inherited finalization.
410 KAutoObjectWithSlabHeapAndContainer<KProcess, KSynchronizationObject>::Finalize(); 416 KAutoObjectWithSlabHeapAndContainer<KProcess, KSynchronizationObject>::Finalize();
411} 417}
diff --git a/src/core/hle/kernel/k_server_session.cpp b/src/core/hle/kernel/k_server_session.cpp
index 5c3c13ce6..b9f24475c 100644
--- a/src/core/hle/kernel/k_server_session.cpp
+++ b/src/core/hle/kernel/k_server_session.cpp
@@ -28,7 +28,10 @@ namespace Kernel {
28 28
29KServerSession::KServerSession(KernelCore& kernel_) : KSynchronizationObject{kernel_} {} 29KServerSession::KServerSession(KernelCore& kernel_) : KSynchronizationObject{kernel_} {}
30 30
31KServerSession::~KServerSession() {} 31KServerSession::~KServerSession() {
32 // Ensure that the global list tracking server sessions does not hold on to a reference.
33 kernel.UnregisterServerSession(this);
34}
32 35
33void KServerSession::Initialize(KSession* parent_session_, std::string&& name_, 36void KServerSession::Initialize(KSession* parent_session_, std::string&& name_,
34 std::shared_ptr<SessionRequestManager> manager_) { 37 std::shared_ptr<SessionRequestManager> manager_) {
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 64bd0c494..92fbc5532 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -61,6 +61,7 @@ struct KernelCore::Impl {
61 void Initialize(KernelCore& kernel) { 61 void Initialize(KernelCore& kernel) {
62 global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel); 62 global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel);
63 global_handle_table = std::make_unique<Kernel::KHandleTable>(kernel); 63 global_handle_table = std::make_unique<Kernel::KHandleTable>(kernel);
64 global_handle_table->Initialize(KHandleTable::MaxTableSize);
64 65
65 is_phantom_mode_for_singlecore = false; 66 is_phantom_mode_for_singlecore = false;
66 67
@@ -90,9 +91,39 @@ struct KernelCore::Impl {
90 } 91 }
91 92
92 void Shutdown() { 93 void Shutdown() {
94 // Shutdown all processes.
95 if (current_process) {
96 current_process->Finalize();
97 current_process->Close();
98 current_process = nullptr;
99 }
93 process_list.clear(); 100 process_list.clear();
94 101
95 // Ensures all service threads gracefully shutdown 102 // Close all open server ports.
103 std::unordered_set<KServerPort*> server_ports_;
104 {
105 std::lock_guard lk(server_ports_lock);
106 server_ports_ = server_ports;
107 server_ports.clear();
108 }
109 for (auto* server_port : server_ports_) {
110 server_port->Close();
111 }
112 // Close all open server sessions.
113 std::unordered_set<KServerSession*> server_sessions_;
114 {
115 std::lock_guard lk(server_sessions_lock);
116 server_sessions_ = server_sessions;
117 server_sessions.clear();
118 }
119 for (auto* server_session : server_sessions_) {
120 server_session->Close();
121 }
122
123 // Ensure that the object list container is finalized and properly shutdown.
124 object_list_container.Finalize();
125
126 // Ensures all service threads gracefully shutdown.
96 service_threads.clear(); 127 service_threads.clear();
97 128
98 next_object_id = 0; 129 next_object_id = 0;
@@ -111,11 +142,7 @@ struct KernelCore::Impl {
111 142
112 cores.clear(); 143 cores.clear();
113 144
114 if (current_process) { 145 global_handle_table->Finalize();
115 current_process->Close();
116 current_process = nullptr;
117 }
118
119 global_handle_table.reset(); 146 global_handle_table.reset();
120 147
121 preemption_event = nullptr; 148 preemption_event = nullptr;
@@ -142,6 +169,16 @@ struct KernelCore::Impl {
142 169
143 // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others 170 // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others
144 next_host_thread_id = Core::Hardware::NUM_CPU_CORES; 171 next_host_thread_id = Core::Hardware::NUM_CPU_CORES;
172
173 // Track kernel objects that were not freed on shutdown
174 {
175 std::lock_guard lk(registered_objects_lock);
176 if (registered_objects.size()) {
177 LOG_WARNING(Kernel, "{} kernel objects were dangling on shutdown!",
178 registered_objects.size());
179 registered_objects.clear();
180 }
181 }
145 } 182 }
146 183
147 void InitializePhysicalCores() { 184 void InitializePhysicalCores() {
@@ -630,6 +667,21 @@ struct KernelCore::Impl {
630 user_slab_heap_size); 667 user_slab_heap_size);
631 } 668 }
632 669
670 KClientPort* CreateNamedServicePort(std::string name) {
671 auto search = service_interface_factory.find(name);
672 if (search == service_interface_factory.end()) {
673 UNIMPLEMENTED();
674 return {};
675 }
676
677 KClientPort* port = &search->second(system.ServiceManager(), system);
678 {
679 std::lock_guard lk(server_ports_lock);
680 server_ports.insert(&port->GetParent()->GetServerPort());
681 }
682 return port;
683 }
684
633 std::atomic<u32> next_object_id{0}; 685 std::atomic<u32> next_object_id{0};
634 std::atomic<u64> next_kernel_process_id{KProcess::InitialKIPIDMin}; 686 std::atomic<u64> next_kernel_process_id{KProcess::InitialKIPIDMin};
635 std::atomic<u64> next_user_process_id{KProcess::ProcessIDMin}; 687 std::atomic<u64> next_user_process_id{KProcess::ProcessIDMin};
@@ -656,6 +708,12 @@ struct KernelCore::Impl {
656 /// the ConnectToPort SVC. 708 /// the ConnectToPort SVC.
657 std::unordered_map<std::string, ServiceInterfaceFactory> service_interface_factory; 709 std::unordered_map<std::string, ServiceInterfaceFactory> service_interface_factory;
658 NamedPortTable named_ports; 710 NamedPortTable named_ports;
711 std::unordered_set<KServerPort*> server_ports;
712 std::unordered_set<KServerSession*> server_sessions;
713 std::unordered_set<KAutoObject*> registered_objects;
714 std::mutex server_ports_lock;
715 std::mutex server_sessions_lock;
716 std::mutex registered_objects_lock;
659 717
660 std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor; 718 std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor;
661 std::vector<Kernel::PhysicalCore> cores; 719 std::vector<Kernel::PhysicalCore> cores;
@@ -844,12 +902,27 @@ void KernelCore::RegisterNamedService(std::string name, ServiceInterfaceFactory&
844} 902}
845 903
846KClientPort* KernelCore::CreateNamedServicePort(std::string name) { 904KClientPort* KernelCore::CreateNamedServicePort(std::string name) {
847 auto search = impl->service_interface_factory.find(name); 905 return impl->CreateNamedServicePort(std::move(name));
848 if (search == impl->service_interface_factory.end()) { 906}
849 UNIMPLEMENTED(); 907
850 return {}; 908void KernelCore::RegisterServerSession(KServerSession* server_session) {
851 } 909 std::lock_guard lk(impl->server_sessions_lock);
852 return &search->second(impl->system.ServiceManager(), impl->system); 910 impl->server_sessions.insert(server_session);
911}
912
913void KernelCore::UnregisterServerSession(KServerSession* server_session) {
914 std::lock_guard lk(impl->server_sessions_lock);
915 impl->server_sessions.erase(server_session);
916}
917
918void KernelCore::RegisterKernelObject(KAutoObject* object) {
919 std::lock_guard lk(impl->registered_objects_lock);
920 impl->registered_objects.insert(object);
921}
922
923void KernelCore::UnregisterKernelObject(KAutoObject* object) {
924 std::lock_guard lk(impl->registered_objects_lock);
925 impl->registered_objects.erase(object);
853} 926}
854 927
855bool KernelCore::IsValidNamedPort(NamedPortTable::const_iterator port) const { 928bool KernelCore::IsValidNamedPort(NamedPortTable::const_iterator port) const {
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 2d01e1ae0..3a6db0b1c 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -45,6 +45,7 @@ class KPort;
45class KProcess; 45class KProcess;
46class KResourceLimit; 46class KResourceLimit;
47class KScheduler; 47class KScheduler;
48class KServerSession;
48class KSession; 49class KSession;
49class KSharedMemory; 50class KSharedMemory;
50class KThread; 51class KThread;
@@ -185,6 +186,22 @@ public:
185 /// Opens a port to a service previously registered with RegisterNamedService. 186 /// Opens a port to a service previously registered with RegisterNamedService.
186 KClientPort* CreateNamedServicePort(std::string name); 187 KClientPort* CreateNamedServicePort(std::string name);
187 188
189 /// Registers a server session with the gobal emulation state, to be freed on shutdown. This is
190 /// necessary because we do not emulate processes for HLE sessions.
191 void RegisterServerSession(KServerSession* server_session);
192
193 /// Unregisters a server session previously registered with RegisterServerSession when it was
194 /// destroyed during the current emulation session.
195 void UnregisterServerSession(KServerSession* server_session);
196
197 /// Registers all kernel objects with the global emulation state, this is purely for tracking
198 /// leaks after emulation has been shutdown.
199 void RegisterKernelObject(KAutoObject* object);
200
201 /// Unregisters a kernel object previously registered with RegisterKernelObject when it was
202 /// destroyed during the current emulation session.
203 void UnregisterKernelObject(KAutoObject* object);
204
188 /// Determines whether or not the given port is a valid named port. 205 /// Determines whether or not the given port is a valid named port.
189 bool IsValidNamedPort(NamedPortTable::const_iterator port) const; 206 bool IsValidNamedPort(NamedPortTable::const_iterator port) const;
190 207
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 8339e11a0..2eb532472 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -298,6 +298,7 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out, VAddr po
298 // Create a session. 298 // Create a session.
299 KClientSession* session{}; 299 KClientSession* session{};
300 R_TRY(port->CreateSession(std::addressof(session))); 300 R_TRY(port->CreateSession(std::addressof(session)));
301 port->Close();
301 302
302 // Register the session in the table, close the extra reference. 303 // Register the session in the table, close the extra reference.
303 handle_table.Register(*out, session); 304 handle_table.Register(*out, session);
@@ -1439,11 +1440,6 @@ static void ExitProcess(Core::System& system) {
1439 LOG_INFO(Kernel_SVC, "Process {} exiting", current_process->GetProcessID()); 1440 LOG_INFO(Kernel_SVC, "Process {} exiting", current_process->GetProcessID());
1440 ASSERT_MSG(current_process->GetStatus() == ProcessStatus::Running, 1441 ASSERT_MSG(current_process->GetStatus() == ProcessStatus::Running,
1441 "Process has already exited"); 1442 "Process has already exited");
1442
1443 current_process->PrepareForTermination();
1444
1445 // Kill the current thread
1446 system.Kernel().CurrentScheduler()->GetCurrentThread()->Exit();
1447} 1443}
1448 1444
1449static void ExitProcess32(Core::System& system) { 1445static void ExitProcess32(Core::System& system) {
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index 6ce1360e3..b7f551e40 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -18,6 +18,7 @@
18#include "core/hle/kernel/k_writable_event.h" 18#include "core/hle/kernel/k_writable_event.h"
19#include "core/hle/kernel/kernel.h" 19#include "core/hle/kernel/kernel.h"
20#include "core/hle/service/hid/controllers/npad.h" 20#include "core/hle/service/hid/controllers/npad.h"
21#include "core/hle/service/kernel_helpers.h"
21 22
22namespace Service::HID { 23namespace Service::HID {
23constexpr s32 HID_JOYSTICK_MAX = 0x7fff; 24constexpr s32 HID_JOYSTICK_MAX = 0x7fff;
@@ -147,7 +148,9 @@ bool Controller_NPad::IsDeviceHandleValid(const DeviceHandle& device_handle) {
147 device_handle.device_index < DeviceIndex::MaxDeviceIndex; 148 device_handle.device_index < DeviceIndex::MaxDeviceIndex;
148} 149}
149 150
150Controller_NPad::Controller_NPad(Core::System& system_) : ControllerBase{system_} { 151Controller_NPad::Controller_NPad(Core::System& system_,
152 KernelHelpers::ServiceContext& service_context_)
153 : ControllerBase{system_}, service_context{service_context_} {
151 latest_vibration_values.fill({DEFAULT_VIBRATION_VALUE, DEFAULT_VIBRATION_VALUE}); 154 latest_vibration_values.fill({DEFAULT_VIBRATION_VALUE, DEFAULT_VIBRATION_VALUE});
152} 155}
153 156
@@ -251,10 +254,9 @@ void Controller_NPad::InitNewlyAddedController(std::size_t controller_idx) {
251} 254}
252 255
253void Controller_NPad::OnInit() { 256void Controller_NPad::OnInit() {
254 auto& kernel = system.Kernel();
255 for (std::size_t i = 0; i < styleset_changed_events.size(); ++i) { 257 for (std::size_t i = 0; i < styleset_changed_events.size(); ++i) {
256 styleset_changed_events[i] = Kernel::KEvent::Create(kernel); 258 styleset_changed_events[i] =
257 styleset_changed_events[i]->Initialize(fmt::format("npad:NpadStyleSetChanged_{}", i)); 259 service_context.CreateEvent(fmt::format("npad:NpadStyleSetChanged_{}", i));
258 } 260 }
259 261
260 if (!IsControllerActivated()) { 262 if (!IsControllerActivated()) {
@@ -344,8 +346,7 @@ void Controller_NPad::OnRelease() {
344 } 346 }
345 347
346 for (std::size_t i = 0; i < styleset_changed_events.size(); ++i) { 348 for (std::size_t i = 0; i < styleset_changed_events.size(); ++i) {
347 styleset_changed_events[i]->Close(); 349 service_context.CloseEvent(styleset_changed_events[i]);
348 styleset_changed_events[i] = nullptr;
349 } 350 }
350} 351}
351 352
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index 1409d82a2..4fcc6f93a 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -20,6 +20,10 @@ class KEvent;
20class KReadableEvent; 20class KReadableEvent;
21} // namespace Kernel 21} // namespace Kernel
22 22
23namespace Service::KernelHelpers {
24class ServiceContext;
25}
26
23namespace Service::HID { 27namespace Service::HID {
24 28
25constexpr u32 NPAD_HANDHELD = 32; 29constexpr u32 NPAD_HANDHELD = 32;
@@ -27,7 +31,8 @@ constexpr u32 NPAD_UNKNOWN = 16; // TODO(ogniK): What is this?
27 31
28class Controller_NPad final : public ControllerBase { 32class Controller_NPad final : public ControllerBase {
29public: 33public:
30 explicit Controller_NPad(Core::System& system_); 34 explicit Controller_NPad(Core::System& system_,
35 KernelHelpers::ServiceContext& service_context_);
31 ~Controller_NPad() override; 36 ~Controller_NPad() override;
32 37
33 // Called when the controller is initialized 38 // Called when the controller is initialized
@@ -566,6 +571,7 @@ private:
566 std::array<std::unique_ptr<Input::MotionDevice>, Settings::NativeMotion::NUM_MOTIONS_HID>, 571 std::array<std::unique_ptr<Input::MotionDevice>, Settings::NativeMotion::NUM_MOTIONS_HID>,
567 10>; 572 10>;
568 573
574 KernelHelpers::ServiceContext& service_context;
569 std::mutex mutex; 575 std::mutex mutex;
570 ButtonArray buttons; 576 ButtonArray buttons;
571 StickArray sticks; 577 StickArray sticks;
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index d68b023d0..b8b80570d 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -46,8 +46,9 @@ constexpr auto pad_update_ns = std::chrono::nanoseconds{1000 * 1000}; //
46constexpr auto motion_update_ns = std::chrono::nanoseconds{15 * 1000 * 1000}; // (15ms, 66.666Hz) 46constexpr auto motion_update_ns = std::chrono::nanoseconds{15 * 1000 * 1000}; // (15ms, 66.666Hz)
47constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000; 47constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;
48 48
49IAppletResource::IAppletResource(Core::System& system_) 49IAppletResource::IAppletResource(Core::System& system_,
50 : ServiceFramework{system_, "IAppletResource"} { 50 KernelHelpers::ServiceContext& service_context_)
51 : ServiceFramework{system_, "IAppletResource"}, service_context{service_context_} {
51 static const FunctionInfo functions[] = { 52 static const FunctionInfo functions[] = {
52 {0, &IAppletResource::GetSharedMemoryHandle, "GetSharedMemoryHandle"}, 53 {0, &IAppletResource::GetSharedMemoryHandle, "GetSharedMemoryHandle"},
53 }; 54 };
@@ -63,7 +64,7 @@ IAppletResource::IAppletResource(Core::System& system_)
63 MakeController<Controller_Stubbed>(HidController::CaptureButton); 64 MakeController<Controller_Stubbed>(HidController::CaptureButton);
64 MakeController<Controller_Stubbed>(HidController::InputDetector); 65 MakeController<Controller_Stubbed>(HidController::InputDetector);
65 MakeController<Controller_Stubbed>(HidController::UniquePad); 66 MakeController<Controller_Stubbed>(HidController::UniquePad);
66 MakeController<Controller_NPad>(HidController::NPad); 67 MakeControllerWithServiceContext<Controller_NPad>(HidController::NPad);
67 MakeController<Controller_Gesture>(HidController::Gesture); 68 MakeController<Controller_Gesture>(HidController::Gesture);
68 MakeController<Controller_ConsoleSixAxis>(HidController::ConsoleSixAxisSensor); 69 MakeController<Controller_ConsoleSixAxis>(HidController::ConsoleSixAxisSensor);
69 70
@@ -191,13 +192,14 @@ private:
191 192
192std::shared_ptr<IAppletResource> Hid::GetAppletResource() { 193std::shared_ptr<IAppletResource> Hid::GetAppletResource() {
193 if (applet_resource == nullptr) { 194 if (applet_resource == nullptr) {
194 applet_resource = std::make_shared<IAppletResource>(system); 195 applet_resource = std::make_shared<IAppletResource>(system, service_context);
195 } 196 }
196 197
197 return applet_resource; 198 return applet_resource;
198} 199}
199 200
200Hid::Hid(Core::System& system_) : ServiceFramework{system_, "hid"} { 201Hid::Hid(Core::System& system_)
202 : ServiceFramework{system_, "hid"}, service_context{system_, service_name} {
201 // clang-format off 203 // clang-format off
202 static const FunctionInfo functions[] = { 204 static const FunctionInfo functions[] = {
203 {0, &Hid::CreateAppletResource, "CreateAppletResource"}, 205 {0, &Hid::CreateAppletResource, "CreateAppletResource"},
@@ -347,7 +349,7 @@ void Hid::CreateAppletResource(Kernel::HLERequestContext& ctx) {
347 LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id); 349 LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id);
348 350
349 if (applet_resource == nullptr) { 351 if (applet_resource == nullptr) {
350 applet_resource = std::make_shared<IAppletResource>(system); 352 applet_resource = std::make_shared<IAppletResource>(system, service_context);
351 } 353 }
352 354
353 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 355 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index 83fc2ea1d..9c5c7f252 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -7,6 +7,7 @@
7#include <chrono> 7#include <chrono>
8 8
9#include "core/hle/service/hid/controllers/controller_base.h" 9#include "core/hle/service/hid/controllers/controller_base.h"
10#include "core/hle/service/kernel_helpers.h"
10#include "core/hle/service/service.h" 11#include "core/hle/service/service.h"
11 12
12namespace Core::Timing { 13namespace Core::Timing {
@@ -39,7 +40,8 @@ enum class HidController : std::size_t {
39 40
40class IAppletResource final : public ServiceFramework<IAppletResource> { 41class IAppletResource final : public ServiceFramework<IAppletResource> {
41public: 42public:
42 explicit IAppletResource(Core::System& system_); 43 explicit IAppletResource(Core::System& system_,
44 KernelHelpers::ServiceContext& service_context_);
43 ~IAppletResource() override; 45 ~IAppletResource() override;
44 46
45 void ActivateController(HidController controller); 47 void ActivateController(HidController controller);
@@ -60,11 +62,18 @@ private:
60 void MakeController(HidController controller) { 62 void MakeController(HidController controller) {
61 controllers[static_cast<std::size_t>(controller)] = std::make_unique<T>(system); 63 controllers[static_cast<std::size_t>(controller)] = std::make_unique<T>(system);
62 } 64 }
65 template <typename T>
66 void MakeControllerWithServiceContext(HidController controller) {
67 controllers[static_cast<std::size_t>(controller)] =
68 std::make_unique<T>(system, service_context);
69 }
63 70
64 void GetSharedMemoryHandle(Kernel::HLERequestContext& ctx); 71 void GetSharedMemoryHandle(Kernel::HLERequestContext& ctx);
65 void UpdateControllers(std::uintptr_t user_data, std::chrono::nanoseconds ns_late); 72 void UpdateControllers(std::uintptr_t user_data, std::chrono::nanoseconds ns_late);
66 void UpdateMotion(std::uintptr_t user_data, std::chrono::nanoseconds ns_late); 73 void UpdateMotion(std::uintptr_t user_data, std::chrono::nanoseconds ns_late);
67 74
75 KernelHelpers::ServiceContext& service_context;
76
68 std::shared_ptr<Core::Timing::EventType> pad_update_event; 77 std::shared_ptr<Core::Timing::EventType> pad_update_event;
69 std::shared_ptr<Core::Timing::EventType> motion_update_event; 78 std::shared_ptr<Core::Timing::EventType> motion_update_event;
70 79
@@ -176,6 +185,8 @@ private:
176 static_assert(sizeof(VibrationDeviceInfo) == 0x8, "VibrationDeviceInfo has incorrect size."); 185 static_assert(sizeof(VibrationDeviceInfo) == 0x8, "VibrationDeviceInfo has incorrect size.");
177 186
178 std::shared_ptr<IAppletResource> applet_resource; 187 std::shared_ptr<IAppletResource> applet_resource;
188
189 KernelHelpers::ServiceContext service_context;
179}; 190};
180 191
181/// Reload input devices. Used when input configuration changed 192/// Reload input devices. Used when input configuration changed
diff --git a/src/core/hle/service/kernel_helpers.cpp b/src/core/hle/service/kernel_helpers.cpp
new file mode 100644
index 000000000..62f4cdfb2
--- /dev/null
+++ b/src/core/hle/service/kernel_helpers.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/core.h"
6#include "core/hle/kernel/k_event.h"
7#include "core/hle/kernel/k_process.h"
8#include "core/hle/kernel/k_readable_event.h"
9#include "core/hle/kernel/k_resource_limit.h"
10#include "core/hle/kernel/k_scoped_resource_reservation.h"
11#include "core/hle/kernel/k_writable_event.h"
12#include "core/hle/service/kernel_helpers.h"
13
14namespace Service::KernelHelpers {
15
16ServiceContext::ServiceContext(Core::System& system_, std::string name_)
17 : kernel(system_.Kernel()) {
18 process = Kernel::KProcess::Create(kernel);
19 ASSERT(Kernel::KProcess::Initialize(process, system_, std::move(name_),
20 Kernel::KProcess::ProcessType::Userland)
21 .IsSuccess());
22}
23
24ServiceContext::~ServiceContext() {
25 process->Close();
26 process = nullptr;
27}
28
29Kernel::KEvent* ServiceContext::CreateEvent(std::string&& name) {
30 // Reserve a new event from the process resource limit
31 Kernel::KScopedResourceReservation event_reservation(process,
32 Kernel::LimitableResource::Events);
33 if (!event_reservation.Succeeded()) {
34 LOG_CRITICAL(Service, "Resource limit reached!");
35 return {};
36 }
37
38 // Create a new event.
39 auto* event = Kernel::KEvent::Create(kernel);
40 if (!event) {
41 LOG_CRITICAL(Service, "Unable to create event!");
42 return {};
43 }
44
45 // Initialize the event.
46 event->Initialize(std::move(name));
47
48 // Commit the thread reservation.
49 event_reservation.Commit();
50
51 // Register the event.
52 Kernel::KEvent::Register(kernel, event);
53
54 return event;
55}
56
57void ServiceContext::CloseEvent(Kernel::KEvent* event) {
58 event->GetReadableEvent().Close();
59 event->GetWritableEvent().Close();
60}
61
62} // namespace Service::KernelHelpers
diff --git a/src/core/hle/service/kernel_helpers.h b/src/core/hle/service/kernel_helpers.h
new file mode 100644
index 000000000..4f3e95f67
--- /dev/null
+++ b/src/core/hle/service/kernel_helpers.h
@@ -0,0 +1,35 @@
1// Copyright 2021 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8
9namespace Core {
10class System;
11}
12
13namespace Kernel {
14class KernelCore;
15class KEvent;
16class KProcess;
17} // namespace Kernel
18
19namespace Service::KernelHelpers {
20
21class ServiceContext {
22public:
23 ServiceContext(Core::System& system_, std::string name_);
24 ~ServiceContext();
25
26 Kernel::KEvent* CreateEvent(std::string&& name);
27
28 void CloseEvent(Kernel::KEvent* event);
29
30private:
31 Kernel::KernelCore& kernel;
32 Kernel::KProcess* process{};
33};
34
35} // namespace Service::KernelHelpers
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 03992af5e..ff405099a 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -39,11 +39,11 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger
39 nvflinger.SetNVDrvInstance(module_); 39 nvflinger.SetNVDrvInstance(module_);
40} 40}
41 41
42Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} { 42Module::Module(Core::System& system)
43 auto& kernel = system.Kernel(); 43 : syncpoint_manager{system.GPU()}, service_context{system, "nvdrv"} {
44 for (u32 i = 0; i < MaxNvEvents; i++) { 44 for (u32 i = 0; i < MaxNvEvents; i++) {
45 events_interface.events[i].event = Kernel::KEvent::Create(kernel); 45 events_interface.events[i].event =
46 events_interface.events[i].event->Initialize(fmt::format("NVDRV::NvEvent_{}", i)); 46 service_context.CreateEvent(fmt::format("NVDRV::NvEvent_{}", i));
47 events_interface.status[i] = EventState::Free; 47 events_interface.status[i] = EventState::Free;
48 events_interface.registered[i] = false; 48 events_interface.registered[i] = false;
49 } 49 }
@@ -65,8 +65,7 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} {
65 65
66Module::~Module() { 66Module::~Module() {
67 for (u32 i = 0; i < MaxNvEvents; i++) { 67 for (u32 i = 0; i < MaxNvEvents; i++) {
68 events_interface.events[i].event->Close(); 68 service_context.CloseEvent(events_interface.events[i].event);
69 events_interface.events[i].event = nullptr;
70 } 69 }
71} 70}
72 71
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index a43ceb7ae..e2a1dde5b 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -9,6 +9,7 @@
9#include <vector> 9#include <vector>
10 10
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "core/hle/service/kernel_helpers.h"
12#include "core/hle/service/nvdrv/nvdata.h" 13#include "core/hle/service/nvdrv/nvdata.h"
13#include "core/hle/service/nvdrv/syncpoint_manager.h" 14#include "core/hle/service/nvdrv/syncpoint_manager.h"
14#include "core/hle/service/service.h" 15#include "core/hle/service/service.h"
@@ -154,6 +155,8 @@ private:
154 std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices; 155 std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices;
155 156
156 EventInterface events_interface; 157 EventInterface events_interface;
158
159 KernelHelpers::ServiceContext service_context;
157}; 160};
158 161
159/// Registers all NVDRV services with the specified service manager. 162/// Registers all NVDRV services with the specified service manager.
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index e6fba88b2..b3e50433b 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -104,23 +104,22 @@ ServiceFrameworkBase::~ServiceFrameworkBase() {
104void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager) { 104void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager) {
105 const auto guard = LockService(); 105 const auto guard = LockService();
106 106
107 ASSERT(!port_installed); 107 ASSERT(!service_registered);
108 108
109 auto port = service_manager.RegisterService(service_name, max_sessions).Unwrap(); 109 service_manager.RegisterService(service_name, max_sessions, shared_from_this());
110 port->SetSessionHandler(shared_from_this()); 110 service_registered = true;
111 port_installed = true;
112} 111}
113 112
114Kernel::KClientPort& ServiceFrameworkBase::CreatePort() { 113Kernel::KClientPort& ServiceFrameworkBase::CreatePort() {
115 const auto guard = LockService(); 114 const auto guard = LockService();
116 115
117 ASSERT(!port_installed); 116 ASSERT(!service_registered);
118 117
119 auto* port = Kernel::KPort::Create(kernel); 118 auto* port = Kernel::KPort::Create(kernel);
120 port->Initialize(max_sessions, false, service_name); 119 port->Initialize(max_sessions, false, service_name);
121 port->GetServerPort().SetSessionHandler(shared_from_this()); 120 port->GetServerPort().SetSessionHandler(shared_from_this());
122 121
123 port_installed = true; 122 service_registered = true;
124 123
125 return port->GetClientPort(); 124 return port->GetClientPort();
126} 125}
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index e078ac176..c9d6b879d 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -96,6 +96,9 @@ protected:
96 /// System context that the service operates under. 96 /// System context that the service operates under.
97 Core::System& system; 97 Core::System& system;
98 98
99 /// Identifier string used to connect to the service.
100 std::string service_name;
101
99private: 102private:
100 template <typename T> 103 template <typename T>
101 friend class ServiceFramework; 104 friend class ServiceFramework;
@@ -117,14 +120,12 @@ private:
117 void RegisterHandlersBaseTipc(const FunctionInfoBase* functions, std::size_t n); 120 void RegisterHandlersBaseTipc(const FunctionInfoBase* functions, std::size_t n);
118 void ReportUnimplementedFunction(Kernel::HLERequestContext& ctx, const FunctionInfoBase* info); 121 void ReportUnimplementedFunction(Kernel::HLERequestContext& ctx, const FunctionInfoBase* info);
119 122
120 /// Identifier string used to connect to the service.
121 std::string service_name;
122 /// Maximum number of concurrent sessions that this service can handle. 123 /// Maximum number of concurrent sessions that this service can handle.
123 u32 max_sessions; 124 u32 max_sessions;
124 125
125 /// Flag to store if a port was already create/installed to detect multiple install attempts, 126 /// Flag to store if a port was already create/installed to detect multiple install attempts,
126 /// which is not supported. 127 /// which is not supported.
127 bool port_installed = false; 128 bool service_registered = false;
128 129
129 /// Function used to safely up-cast pointers to the derived class before invoking a handler. 130 /// Function used to safely up-cast pointers to the derived class before invoking a handler.
130 InvokerFn* handler_invoker; 131 InvokerFn* handler_invoker;
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index 15034abed..ae4dc4a75 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -4,6 +4,7 @@
4 4
5#include <tuple> 5#include <tuple>
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/scope_exit.h"
7#include "core/core.h" 8#include "core/core.h"
8#include "core/hle/ipc_helpers.h" 9#include "core/hle/ipc_helpers.h"
9#include "core/hle/kernel/k_client_port.h" 10#include "core/hle/kernel/k_client_port.h"
@@ -40,17 +41,13 @@ static ResultCode ValidateServiceName(const std::string& name) {
40} 41}
41 42
42Kernel::KClientPort& ServiceManager::InterfaceFactory(ServiceManager& self, Core::System& system) { 43Kernel::KClientPort& ServiceManager::InterfaceFactory(ServiceManager& self, Core::System& system) {
43 ASSERT(self.sm_interface.expired()); 44 self.sm_interface = std::make_shared<SM>(self, system);
44
45 auto sm = std::make_shared<SM>(self, system);
46 self.sm_interface = sm;
47 self.controller_interface = std::make_unique<Controller>(system); 45 self.controller_interface = std::make_unique<Controller>(system);
48 46 return self.sm_interface->CreatePort();
49 return sm->CreatePort();
50} 47}
51 48
52ResultVal<Kernel::KServerPort*> ServiceManager::RegisterService(std::string name, 49ResultCode ServiceManager::RegisterService(std::string name, u32 max_sessions,
53 u32 max_sessions) { 50 Kernel::SessionRequestHandlerPtr handler) {
54 51
55 CASCADE_CODE(ValidateServiceName(name)); 52 CASCADE_CODE(ValidateServiceName(name));
56 53
@@ -59,12 +56,9 @@ ResultVal<Kernel::KServerPort*> ServiceManager::RegisterService(std::string name
59 return ERR_ALREADY_REGISTERED; 56 return ERR_ALREADY_REGISTERED;
60 } 57 }
61 58
62 auto* port = Kernel::KPort::Create(kernel); 59 registered_services.emplace(std::move(name), handler);
63 port->Initialize(max_sessions, false, name);
64 60
65 registered_services.emplace(std::move(name), port); 61 return ResultSuccess;
66
67 return MakeResult(&port->GetServerPort());
68} 62}
69 63
70ResultCode ServiceManager::UnregisterService(const std::string& name) { 64ResultCode ServiceManager::UnregisterService(const std::string& name) {
@@ -76,14 +70,11 @@ ResultCode ServiceManager::UnregisterService(const std::string& name) {
76 return ERR_SERVICE_NOT_REGISTERED; 70 return ERR_SERVICE_NOT_REGISTERED;
77 } 71 }
78 72
79 iter->second->Close();
80
81 registered_services.erase(iter); 73 registered_services.erase(iter);
82 return ResultSuccess; 74 return ResultSuccess;
83} 75}
84 76
85ResultVal<Kernel::KPort*> ServiceManager::GetServicePort(const std::string& name) { 77ResultVal<Kernel::KPort*> ServiceManager::GetServicePort(const std::string& name) {
86
87 CASCADE_CODE(ValidateServiceName(name)); 78 CASCADE_CODE(ValidateServiceName(name));
88 auto it = registered_services.find(name); 79 auto it = registered_services.find(name);
89 if (it == registered_services.end()) { 80 if (it == registered_services.end()) {
@@ -91,10 +82,13 @@ ResultVal<Kernel::KPort*> ServiceManager::GetServicePort(const std::string& name
91 return ERR_SERVICE_NOT_REGISTERED; 82 return ERR_SERVICE_NOT_REGISTERED;
92 } 83 }
93 84
94 return MakeResult(it->second); 85 auto* port = Kernel::KPort::Create(kernel);
95} 86 port->Initialize(ServerSessionCountMax, false, name);
87 auto handler = it->second;
88 port->GetServerPort().SetSessionHandler(std::move(handler));
96 89
97SM::~SM() = default; 90 return MakeResult(port);
91}
98 92
99/** 93/**
100 * SM::Initialize service function 94 * SM::Initialize service function
@@ -156,11 +150,15 @@ ResultVal<Kernel::KClientSession*> SM::GetServiceImpl(Kernel::HLERequestContext&
156 LOG_ERROR(Service_SM, "called service={} -> error 0x{:08X}", name, port_result.Code().raw); 150 LOG_ERROR(Service_SM, "called service={} -> error 0x{:08X}", name, port_result.Code().raw);
157 return port_result.Code(); 151 return port_result.Code();
158 } 152 }
159 auto& port = port_result.Unwrap()->GetClientPort(); 153 auto& port = port_result.Unwrap();
154 SCOPE_EXIT({ port->GetClientPort().Close(); });
155
156 server_ports.emplace_back(&port->GetServerPort());
160 157
161 // Create a new session. 158 // Create a new session.
162 Kernel::KClientSession* session{}; 159 Kernel::KClientSession* session{};
163 if (const auto result = port.CreateSession(std::addressof(session)); result.IsError()) { 160 if (const auto result = port->GetClientPort().CreateSession(std::addressof(session));
161 result.IsError()) {
164 LOG_ERROR(Service_SM, "called service={} -> error 0x{:08X}", name, result.raw); 162 LOG_ERROR(Service_SM, "called service={} -> error 0x{:08X}", name, result.raw);
165 return result; 163 return result;
166 } 164 }
@@ -180,20 +178,21 @@ void SM::RegisterService(Kernel::HLERequestContext& ctx) {
180 LOG_DEBUG(Service_SM, "called with name={}, max_session_count={}, is_light={}", name, 178 LOG_DEBUG(Service_SM, "called with name={}, max_session_count={}, is_light={}", name,
181 max_session_count, is_light); 179 max_session_count, is_light);
182 180
183 auto handle = service_manager.RegisterService(name, max_session_count); 181 if (const auto result = service_manager.RegisterService(name, max_session_count, nullptr);
184 if (handle.Failed()) { 182 result.IsError()) {
185 LOG_ERROR(Service_SM, "failed to register service with error_code={:08X}", 183 LOG_ERROR(Service_SM, "failed to register service with error_code={:08X}", result.raw);
186 handle.Code().raw);
187 IPC::ResponseBuilder rb{ctx, 2}; 184 IPC::ResponseBuilder rb{ctx, 2};
188 rb.Push(handle.Code()); 185 rb.Push(result);
189 return; 186 return;
190 } 187 }
191 188
192 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; 189 auto* port = Kernel::KPort::Create(kernel);
193 rb.Push(handle.Code()); 190 port->Initialize(ServerSessionCountMax, is_light, name);
191 SCOPE_EXIT({ port->GetClientPort().Close(); });
194 192
195 auto server_port = handle.Unwrap(); 193 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
196 rb.PushMoveObjects(server_port); 194 rb.Push(ResultSuccess);
195 rb.PushMoveObjects(port->GetServerPort());
197} 196}
198 197
199void SM::UnregisterService(Kernel::HLERequestContext& ctx) { 198void SM::UnregisterService(Kernel::HLERequestContext& ctx) {
@@ -225,4 +224,10 @@ SM::SM(ServiceManager& service_manager_, Core::System& system_)
225 }); 224 });
226} 225}
227 226
227SM::~SM() {
228 for (auto& server_port : server_ports) {
229 server_port->Close();
230 }
231}
232
228} // namespace Service::SM 233} // namespace Service::SM
diff --git a/src/core/hle/service/sm/sm.h b/src/core/hle/service/sm/sm.h
index ea37f11d4..068c78588 100644
--- a/src/core/hle/service/sm/sm.h
+++ b/src/core/hle/service/sm/sm.h
@@ -49,6 +49,7 @@ private:
49 ServiceManager& service_manager; 49 ServiceManager& service_manager;
50 bool is_initialized{}; 50 bool is_initialized{};
51 Kernel::KernelCore& kernel; 51 Kernel::KernelCore& kernel;
52 std::vector<Kernel::KServerPort*> server_ports;
52}; 53};
53 54
54class ServiceManager { 55class ServiceManager {
@@ -58,7 +59,8 @@ public:
58 explicit ServiceManager(Kernel::KernelCore& kernel_); 59 explicit ServiceManager(Kernel::KernelCore& kernel_);
59 ~ServiceManager(); 60 ~ServiceManager();
60 61
61 ResultVal<Kernel::KServerPort*> RegisterService(std::string name, u32 max_sessions); 62 ResultCode RegisterService(std::string name, u32 max_sessions,
63 Kernel::SessionRequestHandlerPtr handler);
62 ResultCode UnregisterService(const std::string& name); 64 ResultCode UnregisterService(const std::string& name);
63 ResultVal<Kernel::KPort*> GetServicePort(const std::string& name); 65 ResultVal<Kernel::KPort*> GetServicePort(const std::string& name);
64 66
@@ -69,21 +71,17 @@ public:
69 LOG_DEBUG(Service, "Can't find service: {}", service_name); 71 LOG_DEBUG(Service, "Can't find service: {}", service_name);
70 return nullptr; 72 return nullptr;
71 } 73 }
72 auto* port = service->second; 74 return std::static_pointer_cast<T>(service->second);
73 if (port == nullptr) {
74 return nullptr;
75 }
76 return std::static_pointer_cast<T>(port->GetServerPort().GetSessionRequestHandler());
77 } 75 }
78 76
79 void InvokeControlRequest(Kernel::HLERequestContext& context); 77 void InvokeControlRequest(Kernel::HLERequestContext& context);
80 78
81private: 79private:
82 std::weak_ptr<SM> sm_interface; 80 std::shared_ptr<SM> sm_interface;
83 std::unique_ptr<Controller> controller_interface; 81 std::unique_ptr<Controller> controller_interface;
84 82
85 /// Map of registered services, retrieved using GetServicePort. 83 /// Map of registered services, retrieved using GetServicePort.
86 std::unordered_map<std::string, Kernel::KPort*> registered_services; 84 std::unordered_map<std::string, Kernel::SessionRequestHandlerPtr> registered_services;
87 85
88 /// Kernel context 86 /// Kernel context
89 Kernel::KernelCore& kernel; 87 Kernel::KernelCore& kernel;
diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp
index cfaf50105..365b8f906 100644
--- a/src/core/reporter.cpp
+++ b/src/core/reporter.cpp
@@ -62,7 +62,6 @@ json GetYuzuVersionData() {
62 {"build_date", std::string(Common::g_build_date)}, 62 {"build_date", std::string(Common::g_build_date)},
63 {"build_fullname", std::string(Common::g_build_fullname)}, 63 {"build_fullname", std::string(Common::g_build_fullname)},
64 {"build_version", std::string(Common::g_build_version)}, 64 {"build_version", std::string(Common::g_build_version)},
65 {"shader_cache_version", std::string(Common::g_shader_cache_version)},
66 }; 65 };
67} 66}
68 67
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 066cb23e4..422de3a7d 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -233,8 +233,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
233 Settings::values.use_nvdec_emulation.GetValue()); 233 Settings::values.use_nvdec_emulation.GetValue());
234 AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); 234 AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue());
235 AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); 235 AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue());
236 AddField(field_type, "Renderer_UseAssemblyShaders", 236 AddField(field_type, "Renderer_ShaderBackend",
237 Settings::values.use_assembly_shaders.GetValue()); 237 static_cast<u32>(Settings::values.shader_backend.GetValue()));
238 AddField(field_type, "Renderer_UseAsynchronousShaders", 238 AddField(field_type, "Renderer_UseAsynchronousShaders",
239 Settings::values.use_asynchronous_shaders.GetValue()); 239 Settings::values.use_asynchronous_shaders.GetValue());
240 AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode.GetValue()); 240 AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode.GetValue());
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
new file mode 100644
index 000000000..b5b7e5e83
--- /dev/null
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -0,0 +1,268 @@
1add_library(shader_recompiler STATIC
2 backend/bindings.h
3 backend/glasm/emit_context.cpp
4 backend/glasm/emit_context.h
5 backend/glasm/emit_glasm.cpp
6 backend/glasm/emit_glasm.h
7 backend/glasm/emit_glasm_barriers.cpp
8 backend/glasm/emit_glasm_bitwise_conversion.cpp
9 backend/glasm/emit_glasm_composite.cpp
10 backend/glasm/emit_glasm_context_get_set.cpp
11 backend/glasm/emit_glasm_control_flow.cpp
12 backend/glasm/emit_glasm_convert.cpp
13 backend/glasm/emit_glasm_floating_point.cpp
14 backend/glasm/emit_glasm_image.cpp
15 backend/glasm/emit_glasm_instructions.h
16 backend/glasm/emit_glasm_integer.cpp
17 backend/glasm/emit_glasm_logical.cpp
18 backend/glasm/emit_glasm_memory.cpp
19 backend/glasm/emit_glasm_not_implemented.cpp
20 backend/glasm/emit_glasm_select.cpp
21 backend/glasm/emit_glasm_shared_memory.cpp
22 backend/glasm/emit_glasm_special.cpp
23 backend/glasm/emit_glasm_undefined.cpp
24 backend/glasm/emit_glasm_warp.cpp
25 backend/glasm/reg_alloc.cpp
26 backend/glasm/reg_alloc.h
27 backend/glsl/emit_context.cpp
28 backend/glsl/emit_context.h
29 backend/glsl/emit_glsl.cpp
30 backend/glsl/emit_glsl.h
31 backend/glsl/emit_glsl_atomic.cpp
32 backend/glsl/emit_glsl_barriers.cpp
33 backend/glsl/emit_glsl_bitwise_conversion.cpp
34 backend/glsl/emit_glsl_composite.cpp
35 backend/glsl/emit_glsl_context_get_set.cpp
36 backend/glsl/emit_glsl_control_flow.cpp
37 backend/glsl/emit_glsl_convert.cpp
38 backend/glsl/emit_glsl_floating_point.cpp
39 backend/glsl/emit_glsl_image.cpp
40 backend/glsl/emit_glsl_instructions.h
41 backend/glsl/emit_glsl_integer.cpp
42 backend/glsl/emit_glsl_logical.cpp
43 backend/glsl/emit_glsl_memory.cpp
44 backend/glsl/emit_glsl_not_implemented.cpp
45 backend/glsl/emit_glsl_select.cpp
46 backend/glsl/emit_glsl_shared_memory.cpp
47 backend/glsl/emit_glsl_special.cpp
48 backend/glsl/emit_glsl_undefined.cpp
49 backend/glsl/emit_glsl_warp.cpp
50 backend/glsl/var_alloc.cpp
51 backend/glsl/var_alloc.h
52 backend/spirv/emit_context.cpp
53 backend/spirv/emit_context.h
54 backend/spirv/emit_spirv.cpp
55 backend/spirv/emit_spirv.h
56 backend/spirv/emit_spirv_atomic.cpp
57 backend/spirv/emit_spirv_barriers.cpp
58 backend/spirv/emit_spirv_bitwise_conversion.cpp
59 backend/spirv/emit_spirv_composite.cpp
60 backend/spirv/emit_spirv_context_get_set.cpp
61 backend/spirv/emit_spirv_control_flow.cpp
62 backend/spirv/emit_spirv_convert.cpp
63 backend/spirv/emit_spirv_floating_point.cpp
64 backend/spirv/emit_spirv_image.cpp
65 backend/spirv/emit_spirv_image_atomic.cpp
66 backend/spirv/emit_spirv_instructions.h
67 backend/spirv/emit_spirv_integer.cpp
68 backend/spirv/emit_spirv_logical.cpp
69 backend/spirv/emit_spirv_memory.cpp
70 backend/spirv/emit_spirv_select.cpp
71 backend/spirv/emit_spirv_shared_memory.cpp
72 backend/spirv/emit_spirv_special.cpp
73 backend/spirv/emit_spirv_undefined.cpp
74 backend/spirv/emit_spirv_warp.cpp
75 environment.h
76 exception.h
77 frontend/ir/abstract_syntax_list.h
78 frontend/ir/attribute.cpp
79 frontend/ir/attribute.h
80 frontend/ir/basic_block.cpp
81 frontend/ir/basic_block.h
82 frontend/ir/breadth_first_search.h
83 frontend/ir/condition.cpp
84 frontend/ir/condition.h
85 frontend/ir/flow_test.cpp
86 frontend/ir/flow_test.h
87 frontend/ir/ir_emitter.cpp
88 frontend/ir/ir_emitter.h
89 frontend/ir/microinstruction.cpp
90 frontend/ir/modifiers.h
91 frontend/ir/opcodes.cpp
92 frontend/ir/opcodes.h
93 frontend/ir/opcodes.inc
94 frontend/ir/patch.cpp
95 frontend/ir/patch.h
96 frontend/ir/post_order.cpp
97 frontend/ir/post_order.h
98 frontend/ir/pred.h
99 frontend/ir/program.cpp
100 frontend/ir/program.h
101 frontend/ir/reg.h
102 frontend/ir/type.cpp
103 frontend/ir/type.h
104 frontend/ir/value.cpp
105 frontend/ir/value.h
106 frontend/maxwell/control_flow.cpp
107 frontend/maxwell/control_flow.h
108 frontend/maxwell/decode.cpp
109 frontend/maxwell/decode.h
110 frontend/maxwell/indirect_branch_table_track.cpp
111 frontend/maxwell/indirect_branch_table_track.h
112 frontend/maxwell/instruction.h
113 frontend/maxwell/location.h
114 frontend/maxwell/maxwell.inc
115 frontend/maxwell/opcodes.cpp
116 frontend/maxwell/opcodes.h
117 frontend/maxwell/structured_control_flow.cpp
118 frontend/maxwell/structured_control_flow.h
119 frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
120 frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
121 frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
122 frontend/maxwell/translate/impl/barrier_operations.cpp
123 frontend/maxwell/translate/impl/bitfield_extract.cpp
124 frontend/maxwell/translate/impl/bitfield_insert.cpp
125 frontend/maxwell/translate/impl/branch_indirect.cpp
126 frontend/maxwell/translate/impl/common_encoding.h
127 frontend/maxwell/translate/impl/common_funcs.cpp
128 frontend/maxwell/translate/impl/common_funcs.h
129 frontend/maxwell/translate/impl/condition_code_set.cpp
130 frontend/maxwell/translate/impl/double_add.cpp
131 frontend/maxwell/translate/impl/double_compare_and_set.cpp
132 frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
133 frontend/maxwell/translate/impl/double_min_max.cpp
134 frontend/maxwell/translate/impl/double_multiply.cpp
135 frontend/maxwell/translate/impl/double_set_predicate.cpp
136 frontend/maxwell/translate/impl/exit_program.cpp
137 frontend/maxwell/translate/impl/find_leading_one.cpp
138 frontend/maxwell/translate/impl/floating_point_add.cpp
139 frontend/maxwell/translate/impl/floating_point_compare.cpp
140 frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
141 frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
142 frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
143 frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
144 frontend/maxwell/translate/impl/floating_point_min_max.cpp
145 frontend/maxwell/translate/impl/floating_point_multi_function.cpp
146 frontend/maxwell/translate/impl/floating_point_multiply.cpp
147 frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
148 frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
149 frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
150 frontend/maxwell/translate/impl/half_floating_point_add.cpp
151 frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
152 frontend/maxwell/translate/impl/half_floating_point_helper.cpp
153 frontend/maxwell/translate/impl/half_floating_point_helper.h
154 frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
155 frontend/maxwell/translate/impl/half_floating_point_set.cpp
156 frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
157 frontend/maxwell/translate/impl/impl.cpp
158 frontend/maxwell/translate/impl/impl.h
159 frontend/maxwell/translate/impl/integer_add.cpp
160 frontend/maxwell/translate/impl/integer_add_three_input.cpp
161 frontend/maxwell/translate/impl/integer_compare.cpp
162 frontend/maxwell/translate/impl/integer_compare_and_set.cpp
163 frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
164 frontend/maxwell/translate/impl/integer_funnel_shift.cpp
165 frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
166 frontend/maxwell/translate/impl/integer_popcount.cpp
167 frontend/maxwell/translate/impl/integer_scaled_add.cpp
168 frontend/maxwell/translate/impl/integer_set_predicate.cpp
169 frontend/maxwell/translate/impl/integer_shift_left.cpp
170 frontend/maxwell/translate/impl/integer_shift_right.cpp
171 frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
172 frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
173 frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
174 frontend/maxwell/translate/impl/load_constant.cpp
175 frontend/maxwell/translate/impl/load_constant.h
176 frontend/maxwell/translate/impl/load_effective_address.cpp
177 frontend/maxwell/translate/impl/load_store_attribute.cpp
178 frontend/maxwell/translate/impl/load_store_local_shared.cpp
179 frontend/maxwell/translate/impl/load_store_memory.cpp
180 frontend/maxwell/translate/impl/logic_operation.cpp
181 frontend/maxwell/translate/impl/logic_operation_three_input.cpp
182 frontend/maxwell/translate/impl/move_predicate_to_register.cpp
183 frontend/maxwell/translate/impl/move_register.cpp
184 frontend/maxwell/translate/impl/move_register_to_predicate.cpp
185 frontend/maxwell/translate/impl/move_special_register.cpp
186 frontend/maxwell/translate/impl/not_implemented.cpp
187 frontend/maxwell/translate/impl/output_geometry.cpp
188 frontend/maxwell/translate/impl/pixel_load.cpp
189 frontend/maxwell/translate/impl/predicate_set_predicate.cpp
190 frontend/maxwell/translate/impl/predicate_set_register.cpp
191 frontend/maxwell/translate/impl/select_source_with_predicate.cpp
192 frontend/maxwell/translate/impl/surface_atomic_operations.cpp
193 frontend/maxwell/translate/impl/surface_load_store.cpp
194 frontend/maxwell/translate/impl/texture_fetch.cpp
195 frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
196 frontend/maxwell/translate/impl/texture_gather.cpp
197 frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
198 frontend/maxwell/translate/impl/texture_gradient.cpp
199 frontend/maxwell/translate/impl/texture_load.cpp
200 frontend/maxwell/translate/impl/texture_load_swizzled.cpp
201 frontend/maxwell/translate/impl/texture_mipmap_level.cpp
202 frontend/maxwell/translate/impl/texture_query.cpp
203 frontend/maxwell/translate/impl/video_helper.cpp
204 frontend/maxwell/translate/impl/video_helper.h
205 frontend/maxwell/translate/impl/video_minimum_maximum.cpp
206 frontend/maxwell/translate/impl/video_multiply_add.cpp
207 frontend/maxwell/translate/impl/video_set_predicate.cpp
208 frontend/maxwell/translate/impl/vote.cpp
209 frontend/maxwell/translate/impl/warp_shuffle.cpp
210 frontend/maxwell/translate/translate.cpp
211 frontend/maxwell/translate/translate.h
212 frontend/maxwell/translate_program.cpp
213 frontend/maxwell/translate_program.h
214 host_translate_info.h
215 ir_opt/collect_shader_info_pass.cpp
216 ir_opt/constant_propagation_pass.cpp
217 ir_opt/dead_code_elimination_pass.cpp
218 ir_opt/dual_vertex_pass.cpp
219 ir_opt/global_memory_to_storage_buffer_pass.cpp
220 ir_opt/identity_removal_pass.cpp
221 ir_opt/lower_fp16_to_fp32.cpp
222 ir_opt/lower_int64_to_int32.cpp
223 ir_opt/passes.h
224 ir_opt/ssa_rewrite_pass.cpp
225 ir_opt/texture_pass.cpp
226 ir_opt/verification_pass.cpp
227 object_pool.h
228 profile.h
229 program_header.h
230 runtime_info.h
231 shader_info.h
232 varying_state.h
233)
234
235target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit)
236
237if (MSVC)
238 target_compile_options(shader_recompiler PRIVATE
239 /W4
240 /WX
241 /we4018 # 'expression' : signed/unsigned mismatch
242 /we4244 # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point)
243 /we4245 # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch
244 /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
245 /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
246 /we4305 # 'context' : truncation from 'type1' to 'type2'
247 /we4800 # Implicit conversion from 'type' to bool. Possible information loss
248 /we4826 # Conversion from 'type1' to 'type2' is sign-extended. This may cause unexpected runtime behavior.
249 )
250else()
251 target_compile_options(shader_recompiler PRIVATE
252 -Werror
253 -Werror=conversion
254 -Werror=ignored-qualifiers
255 -Werror=implicit-fallthrough
256 -Werror=shadow
257 -Werror=sign-compare
258 $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
259 $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
260 -Werror=unused-variable
261
262 # Bracket depth determines maximum size of a fold expression in Clang since 9c9974c3ccb6.
263 # And this in turns limits the size of a std::array.
264 $<$<CXX_COMPILER_ID:Clang>:-fbracket-depth=1024>
265 )
266endif()
267
268create_target_directory_groups(shader_recompiler)
diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h
new file mode 100644
index 000000000..35503000c
--- /dev/null
+++ b/src/shader_recompiler/backend/bindings.h
@@ -0,0 +1,19 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Shader::Backend {
10
11struct Bindings {
12 u32 unified{};
13 u32 uniform_buffer{};
14 u32 storage_buffer{};
15 u32 texture{};
16 u32 image{};
17};
18
19} // namespace Shader::Backend
diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp
new file mode 100644
index 000000000..069c019ad
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_context.cpp
@@ -0,0 +1,154 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/bindings.h"
8#include "shader_recompiler/backend/glasm/emit_context.h"
9#include "shader_recompiler/frontend/ir/program.h"
10#include "shader_recompiler/profile.h"
11#include "shader_recompiler/runtime_info.h"
12
13namespace Shader::Backend::GLASM {
14namespace {
15std::string_view InterpDecorator(Interpolation interp) {
16 switch (interp) {
17 case Interpolation::Smooth:
18 return "";
19 case Interpolation::Flat:
20 return "FLAT ";
21 case Interpolation::NoPerspective:
22 return "NOPERSPECTIVE ";
23 }
24 throw InvalidArgument("Invalid interpolation {}", interp);
25}
26
27bool IsInputArray(Stage stage) {
28 return stage == Stage::Geometry || stage == Stage::TessellationControl ||
29 stage == Stage::TessellationEval;
30}
31} // Anonymous namespace
32
33EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
34 const RuntimeInfo& runtime_info_)
35 : info{program.info}, profile{profile_}, runtime_info{runtime_info_} {
36 // FIXME: Temporary partial implementation
37 u32 cbuf_index{};
38 for (const auto& desc : info.constant_buffer_descriptors) {
39 if (desc.count != 1) {
40 throw NotImplementedException("Constant buffer descriptor array");
41 }
42 Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index);
43 ++cbuf_index;
44 }
45 u32 ssbo_index{};
46 for (const auto& desc : info.storage_buffers_descriptors) {
47 if (desc.count != 1) {
48 throw NotImplementedException("Storage buffer descriptor array");
49 }
50 if (runtime_info.glasm_use_storage_buffers) {
51 Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer);
52 ++bindings.storage_buffer;
53 ++ssbo_index;
54 }
55 }
56 if (!runtime_info.glasm_use_storage_buffers) {
57 if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) {
58 Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1);
59 }
60 }
61 stage = program.stage;
62 switch (program.stage) {
63 case Stage::VertexA:
64 case Stage::VertexB:
65 stage_name = "vertex";
66 attrib_name = "vertex";
67 break;
68 case Stage::TessellationControl:
69 case Stage::TessellationEval:
70 stage_name = "primitive";
71 attrib_name = "primitive";
72 break;
73 case Stage::Geometry:
74 stage_name = "primitive";
75 attrib_name = "vertex";
76 break;
77 case Stage::Fragment:
78 stage_name = "fragment";
79 attrib_name = "fragment";
80 break;
81 case Stage::Compute:
82 stage_name = "invocation";
83 break;
84 }
85 const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"};
86 const VaryingState loads{info.loads.mask | info.passthrough.mask};
87 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
88 if (loads.Generic(index)) {
89 Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};",
90 InterpDecorator(info.interpolation[index]), index, attr_stage, index, index);
91 }
92 }
93 if (IsInputArray(stage) && loads.AnyComponent(IR::Attribute::PositionX)) {
94 Add("ATTRIB vertex_position=vertex.position;");
95 }
96 if (info.uses_invocation_id) {
97 Add("ATTRIB primitive_invocation=primitive.invocation;");
98 }
99 if (info.stores_tess_level_outer) {
100 Add("OUTPUT result_patch_tessouter[]={{result.patch.tessouter[0..3]}};");
101 }
102 if (info.stores_tess_level_inner) {
103 Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};");
104 }
105 if (info.stores.ClipDistances()) {
106 Add("OUTPUT result_clip[]={{result.clip[0..7]}};");
107 }
108 for (size_t index = 0; index < info.uses_patches.size(); ++index) {
109 if (!info.uses_patches[index]) {
110 continue;
111 }
112 if (stage == Stage::TessellationControl) {
113 Add("OUTPUT result_patch_attrib{}[]={{result.patch.attrib[{}..{}]}};"
114 "ATTRIB primitive_out_patch_attrib{}[]={{primitive.out.patch.attrib[{}..{}]}};",
115 index, index, index, index, index, index);
116 } else {
117 Add("ATTRIB primitive_patch_attrib{}[]={{primitive.patch.attrib[{}..{}]}};", index,
118 index, index);
119 }
120 }
121 if (stage == Stage::Fragment) {
122 Add("OUTPUT frag_color0=result.color;");
123 for (size_t index = 1; index < info.stores_frag_color.size(); ++index) {
124 Add("OUTPUT frag_color{}=result.color[{}];", index, index);
125 }
126 }
127 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
128 if (info.stores.Generic(index)) {
129 Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index);
130 }
131 }
132 image_buffer_bindings.reserve(info.image_buffer_descriptors.size());
133 for (const auto& desc : info.image_buffer_descriptors) {
134 image_buffer_bindings.push_back(bindings.image);
135 bindings.image += desc.count;
136 }
137 image_bindings.reserve(info.image_descriptors.size());
138 for (const auto& desc : info.image_descriptors) {
139 image_bindings.push_back(bindings.image);
140 bindings.image += desc.count;
141 }
142 texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size());
143 for (const auto& desc : info.texture_buffer_descriptors) {
144 texture_buffer_bindings.push_back(bindings.texture);
145 bindings.texture += desc.count;
146 }
147 texture_bindings.reserve(info.texture_descriptors.size());
148 for (const auto& desc : info.texture_descriptors) {
149 texture_bindings.push_back(bindings.texture);
150 bindings.texture += desc.count;
151 }
152}
153
154} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h
new file mode 100644
index 000000000..8433e5c00
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_context.h
@@ -0,0 +1,80 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <utility>
9#include <vector>
10
11#include <fmt/format.h>
12
13#include "shader_recompiler/backend/glasm/reg_alloc.h"
14#include "shader_recompiler/stage.h"
15
16namespace Shader {
17struct Info;
18struct Profile;
19struct RuntimeInfo;
20} // namespace Shader
21
22namespace Shader::Backend {
23struct Bindings;
24}
25
26namespace Shader::IR {
27class Inst;
28struct Program;
29} // namespace Shader::IR
30
31namespace Shader::Backend::GLASM {
32
33class EmitContext {
34public:
35 explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
36 const RuntimeInfo& runtime_info_);
37
38 template <typename... Args>
39 void Add(const char* format_str, IR::Inst& inst, Args&&... args) {
40 code += fmt::format(fmt::runtime(format_str), reg_alloc.Define(inst),
41 std::forward<Args>(args)...);
42 // TODO: Remove this
43 code += '\n';
44 }
45
46 template <typename... Args>
47 void LongAdd(const char* format_str, IR::Inst& inst, Args&&... args) {
48 code += fmt::format(fmt::runtime(format_str), reg_alloc.LongDefine(inst),
49 std::forward<Args>(args)...);
50 // TODO: Remove this
51 code += '\n';
52 }
53
54 template <typename... Args>
55 void Add(const char* format_str, Args&&... args) {
56 code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...);
57 // TODO: Remove this
58 code += '\n';
59 }
60
61 std::string code;
62 RegAlloc reg_alloc{};
63 const Info& info;
64 const Profile& profile;
65 const RuntimeInfo& runtime_info;
66
67 std::vector<u32> texture_buffer_bindings;
68 std::vector<u32> image_buffer_bindings;
69 std::vector<u32> texture_bindings;
70 std::vector<u32> image_bindings;
71
72 Stage stage{};
73 std::string_view stage_name = "invalid";
74 std::string_view attrib_name = "invalid";
75
76 u32 num_safety_loop_vars{};
77 bool uses_y_direction{};
78};
79
80} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
new file mode 100644
index 000000000..a5e8c9b6e
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
@@ -0,0 +1,492 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <string>
7#include <tuple>
8
9#include "common/div_ceil.h"
10#include "common/settings.h"
11#include "shader_recompiler/backend/bindings.h"
12#include "shader_recompiler/backend/glasm/emit_context.h"
13#include "shader_recompiler/backend/glasm/emit_glasm.h"
14#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
15#include "shader_recompiler/frontend/ir/ir_emitter.h"
16#include "shader_recompiler/frontend/ir/program.h"
17#include "shader_recompiler/profile.h"
18#include "shader_recompiler/runtime_info.h"
19
20namespace Shader::Backend::GLASM {
21namespace {
22template <class Func>
23struct FuncTraits {};
24
25template <class ReturnType_, class... Args>
26struct FuncTraits<ReturnType_ (*)(Args...)> {
27 using ReturnType = ReturnType_;
28
29 static constexpr size_t NUM_ARGS = sizeof...(Args);
30
31 template <size_t I>
32 using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
33};
34
35template <typename T>
36struct Identity {
37 Identity(T data_) : data{data_} {}
38
39 T Extract() {
40 return data;
41 }
42
43 T data;
44};
45
46template <bool scalar>
47class RegWrapper {
48public:
49 RegWrapper(EmitContext& ctx, const IR::Value& ir_value) : reg_alloc{ctx.reg_alloc} {
50 const Value value{reg_alloc.Peek(ir_value)};
51 if (value.type == Type::Register) {
52 inst = ir_value.InstRecursive();
53 reg = Register{value};
54 } else {
55 reg = value.type == Type::U64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg();
56 }
57 switch (value.type) {
58 case Type::Register:
59 case Type::Void:
60 break;
61 case Type::U32:
62 ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32);
63 break;
64 case Type::U64:
65 ctx.Add("MOV.U64 {}.x,{};", reg, value.imm_u64);
66 break;
67 }
68 }
69
70 auto Extract() {
71 if (inst) {
72 reg_alloc.Unref(*inst);
73 } else {
74 reg_alloc.FreeReg(reg);
75 }
76 return std::conditional_t<scalar, ScalarRegister, Register>{Value{reg}};
77 }
78
79private:
80 RegAlloc& reg_alloc;
81 IR::Inst* inst{};
82 Register reg{};
83};
84
85template <typename ArgType>
86class ValueWrapper {
87public:
88 ValueWrapper(EmitContext& ctx, const IR::Value& ir_value_)
89 : reg_alloc{ctx.reg_alloc}, ir_value{ir_value_}, value{reg_alloc.Peek(ir_value)} {}
90
91 ArgType Extract() {
92 if (!ir_value.IsImmediate()) {
93 reg_alloc.Unref(*ir_value.InstRecursive());
94 }
95 return value;
96 }
97
98private:
99 RegAlloc& reg_alloc;
100 const IR::Value& ir_value;
101 ArgType value;
102};
103
104template <typename ArgType>
105auto Arg(EmitContext& ctx, const IR::Value& arg) {
106 if constexpr (std::is_same_v<ArgType, Register>) {
107 return RegWrapper<false>{ctx, arg};
108 } else if constexpr (std::is_same_v<ArgType, ScalarRegister>) {
109 return RegWrapper<true>{ctx, arg};
110 } else if constexpr (std::is_base_of_v<Value, ArgType>) {
111 return ValueWrapper<ArgType>{ctx, arg};
112 } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
113 return Identity<const IR::Value&>{arg};
114 } else if constexpr (std::is_same_v<ArgType, u32>) {
115 return Identity{arg.U32()};
116 } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
117 return Identity{arg.Attribute()};
118 } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
119 return Identity{arg.Patch()};
120 } else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
121 return Identity{arg.Reg()};
122 }
123}
124
125template <auto func, bool is_first_arg_inst>
126struct InvokeCall {
127 template <typename... Args>
128 InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) {
129 if constexpr (is_first_arg_inst) {
130 func(ctx, *inst, args.Extract()...);
131 } else {
132 func(ctx, args.Extract()...);
133 }
134 }
135};
136
137template <auto func, bool is_first_arg_inst, size_t... I>
138void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
139 using Traits = FuncTraits<decltype(func)>;
140 if constexpr (is_first_arg_inst) {
141 InvokeCall<func, is_first_arg_inst>{
142 ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...};
143 } else {
144 InvokeCall<func, is_first_arg_inst>{
145 ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...};
146 }
147}
148
149template <auto func>
150void Invoke(EmitContext& ctx, IR::Inst* inst) {
151 using Traits = FuncTraits<decltype(func)>;
152 static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
153 if constexpr (Traits::NUM_ARGS == 1) {
154 Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
155 } else {
156 using FirstArgType = typename Traits::template ArgType<1>;
157 static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>;
158 using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
159 Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
160 }
161}
162
163void EmitInst(EmitContext& ctx, IR::Inst* inst) {
164 switch (inst->GetOpcode()) {
165#define OPCODE(name, result_type, ...) \
166 case IR::Opcode::name: \
167 return Invoke<&Emit##name>(ctx, inst);
168#include "shader_recompiler/frontend/ir/opcodes.inc"
169#undef OPCODE
170 }
171 throw LogicError("Invalid opcode {}", inst->GetOpcode());
172}
173
174bool IsReference(IR::Inst& inst) {
175 return inst.GetOpcode() == IR::Opcode::Reference;
176}
177
178void PrecolorInst(IR::Inst& phi) {
179 // Insert phi moves before references to avoid overwritting other phis
180 const size_t num_args{phi.NumArgs()};
181 for (size_t i = 0; i < num_args; ++i) {
182 IR::Block& phi_block{*phi.PhiBlock(i)};
183 auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()};
184 IR::IREmitter ir{phi_block, it};
185 const IR::Value arg{phi.Arg(i)};
186 if (arg.IsImmediate()) {
187 ir.PhiMove(phi, arg);
188 } else {
189 ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())});
190 }
191 }
192 for (size_t i = 0; i < num_args; ++i) {
193 IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
194 }
195}
196
197void Precolor(const IR::Program& program) {
198 for (IR::Block* const block : program.blocks) {
199 for (IR::Inst& phi : block->Instructions()) {
200 if (!IR::IsPhi(phi)) {
201 break;
202 }
203 PrecolorInst(phi);
204 }
205 }
206}
207
208void EmitCode(EmitContext& ctx, const IR::Program& program) {
209 const auto eval{
210 [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }};
211 for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
212 switch (node.type) {
213 case IR::AbstractSyntaxNode::Type::Block:
214 for (IR::Inst& inst : node.data.block->Instructions()) {
215 EmitInst(ctx, &inst);
216 }
217 break;
218 case IR::AbstractSyntaxNode::Type::If:
219 ctx.Add("MOV.S.CC RC,{};"
220 "IF NE.x;",
221 eval(node.data.if_node.cond));
222 break;
223 case IR::AbstractSyntaxNode::Type::EndIf:
224 ctx.Add("ENDIF;");
225 break;
226 case IR::AbstractSyntaxNode::Type::Loop:
227 ctx.Add("REP;");
228 break;
229 case IR::AbstractSyntaxNode::Type::Repeat:
230 if (!Settings::values.disable_shader_loop_safety_checks) {
231 const u32 loop_index{ctx.num_safety_loop_vars++};
232 const u32 vector_index{loop_index / 4};
233 const char component{"xyzw"[loop_index % 4]};
234 ctx.Add("SUB.S.CC loop{}.{},loop{}.{},1;"
235 "BRK(LT.{});",
236 vector_index, component, vector_index, component, component);
237 }
238 if (node.data.repeat.cond.IsImmediate()) {
239 if (node.data.repeat.cond.U1()) {
240 ctx.Add("ENDREP;");
241 } else {
242 ctx.Add("BRK;"
243 "ENDREP;");
244 }
245 } else {
246 ctx.Add("MOV.S.CC RC,{};"
247 "BRK(EQ.x);"
248 "ENDREP;",
249 eval(node.data.repeat.cond));
250 }
251 break;
252 case IR::AbstractSyntaxNode::Type::Break:
253 if (node.data.break_node.cond.IsImmediate()) {
254 if (node.data.break_node.cond.U1()) {
255 ctx.Add("BRK;");
256 }
257 } else {
258 ctx.Add("MOV.S.CC RC,{};"
259 "BRK (NE.x);",
260 eval(node.data.break_node.cond));
261 }
262 break;
263 case IR::AbstractSyntaxNode::Type::Return:
264 case IR::AbstractSyntaxNode::Type::Unreachable:
265 ctx.Add("RET;");
266 break;
267 }
268 }
269 if (!ctx.reg_alloc.IsEmpty()) {
270 LOG_WARNING(Shader_GLASM, "Register leak after generating code");
271 }
272}
273
274void SetupOptions(const IR::Program& program, const Profile& profile,
275 const RuntimeInfo& runtime_info, std::string& header) {
276 const Info& info{program.info};
277 const Stage stage{program.stage};
278
279 // TODO: Track the shared atomic ops
280 header += "OPTION NV_internal;"
281 "OPTION NV_shader_storage_buffer;"
282 "OPTION NV_gpu_program_fp64;";
283 if (info.uses_int64_bit_atomics) {
284 header += "OPTION NV_shader_atomic_int64;";
285 }
286 if (info.uses_atomic_f32_add) {
287 header += "OPTION NV_shader_atomic_float;";
288 }
289 if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
290 header += "OPTION NV_shader_atomic_fp16_vector;";
291 }
292 if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote ||
293 info.uses_fswzadd) {
294 header += "OPTION NV_shader_thread_group;";
295 }
296 if (info.uses_subgroup_shuffles) {
297 header += "OPTION NV_shader_thread_shuffle;";
298 }
299 if (info.uses_sparse_residency) {
300 header += "OPTION EXT_sparse_texture2;";
301 }
302 const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] ||
303 info.stores[IR::Attribute::Layer]};
304 if ((stage != Stage::Geometry && stores_viewport_layer) ||
305 info.stores[IR::Attribute::ViewportMask]) {
306 if (profile.support_viewport_index_layer_non_geometry) {
307 header += "OPTION NV_viewport_array2;";
308 }
309 }
310 if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) {
311 header += "OPTION NV_geometry_shader_passthrough;";
312 }
313 if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) {
314 header += "OPTION EXT_shader_image_load_formatted;";
315 }
316 if (profile.support_derivative_control) {
317 header += "OPTION ARB_derivative_control;";
318 }
319 if (stage == Stage::Fragment && runtime_info.force_early_z != 0) {
320 header += "OPTION NV_early_fragment_tests;";
321 }
322 if (stage == Stage::Fragment) {
323 header += "OPTION ARB_draw_buffers;";
324 }
325}
326
327std::string_view StageHeader(Stage stage) {
328 switch (stage) {
329 case Stage::VertexA:
330 case Stage::VertexB:
331 return "!!NVvp5.0\n";
332 case Stage::TessellationControl:
333 return "!!NVtcp5.0\n";
334 case Stage::TessellationEval:
335 return "!!NVtep5.0\n";
336 case Stage::Geometry:
337 return "!!NVgp5.0\n";
338 case Stage::Fragment:
339 return "!!NVfp5.0\n";
340 case Stage::Compute:
341 return "!!NVcp5.0\n";
342 }
343 throw InvalidArgument("Invalid stage {}", stage);
344}
345
346std::string_view InputPrimitive(InputTopology topology) {
347 switch (topology) {
348 case InputTopology::Points:
349 return "POINTS";
350 case InputTopology::Lines:
351 return "LINES";
352 case InputTopology::LinesAdjacency:
353 return "LINESS_ADJACENCY";
354 case InputTopology::Triangles:
355 return "TRIANGLES";
356 case InputTopology::TrianglesAdjacency:
357 return "TRIANGLES_ADJACENCY";
358 }
359 throw InvalidArgument("Invalid input topology {}", topology);
360}
361
362std::string_view OutputPrimitive(OutputTopology topology) {
363 switch (topology) {
364 case OutputTopology::PointList:
365 return "POINTS";
366 case OutputTopology::LineStrip:
367 return "LINE_STRIP";
368 case OutputTopology::TriangleStrip:
369 return "TRIANGLE_STRIP";
370 }
371 throw InvalidArgument("Invalid output topology {}", topology);
372}
373
374std::string_view GetTessMode(TessPrimitive primitive) {
375 switch (primitive) {
376 case TessPrimitive::Triangles:
377 return "TRIANGLES";
378 case TessPrimitive::Quads:
379 return "QUADS";
380 case TessPrimitive::Isolines:
381 return "ISOLINES";
382 }
383 throw InvalidArgument("Invalid tessellation primitive {}", primitive);
384}
385
386std::string_view GetTessSpacing(TessSpacing spacing) {
387 switch (spacing) {
388 case TessSpacing::Equal:
389 return "EQUAL";
390 case TessSpacing::FractionalOdd:
391 return "FRACTIONAL_ODD";
392 case TessSpacing::FractionalEven:
393 return "FRACTIONAL_EVEN";
394 }
395 throw InvalidArgument("Invalid tessellation spacing {}", spacing);
396}
397} // Anonymous namespace
398
399std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program,
400 Bindings& bindings) {
401 EmitContext ctx{program, bindings, profile, runtime_info};
402 Precolor(program);
403 EmitCode(ctx, program);
404 std::string header{StageHeader(program.stage)};
405 SetupOptions(program, profile, runtime_info, header);
406 switch (program.stage) {
407 case Stage::TessellationControl:
408 header += fmt::format("VERTICES_OUT {};", program.invocations);
409 break;
410 case Stage::TessellationEval:
411 header += fmt::format("TESS_MODE {};"
412 "TESS_SPACING {};"
413 "TESS_VERTEX_ORDER {};",
414 GetTessMode(runtime_info.tess_primitive),
415 GetTessSpacing(runtime_info.tess_spacing),
416 runtime_info.tess_clockwise ? "CW" : "CCW");
417 break;
418 case Stage::Geometry:
419 header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology));
420 if (program.is_geometry_passthrough) {
421 if (profile.support_geometry_shader_passthrough) {
422 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
423 if (program.info.passthrough.Generic(index)) {
424 header += fmt::format("PASSTHROUGH result.attrib[{}];", index);
425 }
426 }
427 if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
428 header += "PASSTHROUGH result.position;";
429 }
430 } else {
431 LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported");
432 }
433 } else {
434 header +=
435 fmt::format("VERTICES_OUT {};"
436 "PRIMITIVE_OUT {};",
437 program.output_vertices, OutputPrimitive(program.output_topology));
438 }
439 break;
440 case Stage::Compute:
441 header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0],
442 program.workgroup_size[1], program.workgroup_size[2]);
443 break;
444 default:
445 break;
446 }
447 if (program.shared_memory_size > 0) {
448 header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size);
449 header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};");
450 }
451 header += "TEMP ";
452 for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) {
453 header += fmt::format("R{},", index);
454 }
455 if (program.local_memory_size > 0) {
456 header += fmt::format("lmem[{}],", program.local_memory_size);
457 }
458 if (program.info.uses_fswzadd) {
459 header += "FSWZA[4],FSWZB[4],";
460 }
461 const u32 num_safety_loop_vectors{Common::DivCeil(ctx.num_safety_loop_vars, 4u)};
462 for (u32 index = 0; index < num_safety_loop_vectors; ++index) {
463 header += fmt::format("loop{},", index);
464 }
465 header += "RC;"
466 "LONG TEMP ";
467 for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) {
468 header += fmt::format("D{},", index);
469 }
470 header += "DC;";
471 if (program.info.uses_fswzadd) {
472 header += "MOV.F FSWZA[0],-1;"
473 "MOV.F FSWZA[1],1;"
474 "MOV.F FSWZA[2],-1;"
475 "MOV.F FSWZA[3],0;"
476 "MOV.F FSWZB[0],-1;"
477 "MOV.F FSWZB[1],-1;"
478 "MOV.F FSWZB[2],1;"
479 "MOV.F FSWZB[3],-1;";
480 }
481 for (u32 index = 0; index < num_safety_loop_vectors; ++index) {
482 header += fmt::format("MOV.S loop{},{{0x2000,0x2000,0x2000,0x2000}};", index);
483 }
484 if (ctx.uses_y_direction) {
485 header += "PARAM y_direction[1]={state.material.front.ambient};";
486 }
487 ctx.code.insert(0, header);
488 ctx.code += "END";
489 return ctx.code;
490}
491
492} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h
new file mode 100644
index 000000000..bcb55f062
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.h
@@ -0,0 +1,25 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8
9#include "shader_recompiler/backend/bindings.h"
10#include "shader_recompiler/frontend/ir/program.h"
11#include "shader_recompiler/profile.h"
12#include "shader_recompiler/runtime_info.h"
13
14namespace Shader::Backend::GLASM {
15
16[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
17 IR::Program& program, Bindings& bindings);
18
19[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
20 IR::Program& program) {
21 Bindings binding;
22 return EmitGLASM(profile, runtime_info, program, binding);
23}
24
25} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
new file mode 100644
index 000000000..9201ccd39
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
@@ -0,0 +1,91 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/glasm/emit_context.h"
6#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
7#include "shader_recompiler/frontend/ir/value.h"
8
9namespace Shader::Backend::GLASM {
10
11static void Alias(IR::Inst& inst, const IR::Value& value) {
12 if (value.IsImmediate()) {
13 return;
14 }
15 IR::Inst& value_inst{RegAlloc::AliasInst(*value.Inst())};
16 value_inst.DestructiveAddUsage(inst.UseCount());
17 value_inst.DestructiveRemoveUsage();
18 inst.SetDefinition(value_inst.Definition<Id>());
19}
20
21void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) {
22 Alias(inst, value);
23}
24
25void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) {
26 // Fake one usage to get a real register out of the condition
27 inst.DestructiveAddUsage(1);
28 const Register ret{ctx.reg_alloc.Define(inst)};
29 const ScalarS32 input{ctx.reg_alloc.Consume(value)};
30 if (ret != input) {
31 ctx.Add("MOV.S {},{};", ret, input);
32 }
33}
34
35void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
36 Alias(inst, value);
37}
38
39void EmitBitCastU32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) {
40 Alias(inst, value);
41}
42
43void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) {
44 Alias(inst, value);
45}
46
47void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
48 Alias(inst, value);
49}
50
51void EmitBitCastF32U32(EmitContext&, IR::Inst& inst, const IR::Value& value) {
52 Alias(inst, value);
53}
54
55void EmitBitCastF64U64(EmitContext&, IR::Inst& inst, const IR::Value& value) {
56 Alias(inst, value);
57}
58
59void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
60 ctx.LongAdd("PK64.U {}.x,{};", inst, value);
61}
62
63void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
64 ctx.Add("UP64.U {}.xy,{}.x;", inst, value);
65}
66
67void EmitPackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
68 throw NotImplementedException("GLASM instruction");
69}
70
71void EmitUnpackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
72 throw NotImplementedException("GLASM instruction");
73}
74
75void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) {
76 ctx.Add("PK2H {}.x,{};", inst, value);
77}
78
79void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) {
80 ctx.Add("UP2H {}.xy,{}.x;", inst, value);
81}
82
83void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
84 ctx.LongAdd("PK64 {}.x,{};", inst, value);
85}
86
87void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
88 ctx.Add("UP64 {}.xy,{}.x;", inst, value);
89}
90
91} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
new file mode 100644
index 000000000..bff0b7c1c
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
@@ -0,0 +1,244 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/glasm/emit_context.h"
6#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
7#include "shader_recompiler/frontend/ir/value.h"
8
9namespace Shader::Backend::GLASM {
10namespace {
11template <auto read_imm, char type, typename... Values>
12void CompositeConstruct(EmitContext& ctx, IR::Inst& inst, Values&&... elements) {
13 const Register ret{ctx.reg_alloc.Define(inst)};
14 if (std::ranges::any_of(std::array{elements...},
15 [](const IR::Value& value) { return value.IsImmediate(); })) {
16 using Type = std::invoke_result_t<decltype(read_imm), IR::Value>;
17 const std::array<Type, 4> values{(elements.IsImmediate() ? (elements.*read_imm)() : 0)...};
18 ctx.Add("MOV.{} {},{{{},{},{},{}}};", type, ret, fmt::to_string(values[0]),
19 fmt::to_string(values[1]), fmt::to_string(values[2]), fmt::to_string(values[3]));
20 }
21 size_t index{};
22 for (const IR::Value& element : {elements...}) {
23 if (!element.IsImmediate()) {
24 const ScalarU32 value{ctx.reg_alloc.Consume(element)};
25 ctx.Add("MOV.{} {}.{},{};", type, ret, "xyzw"[index], value);
26 }
27 ++index;
28 }
29}
30
31void CompositeExtract(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index, char type) {
32 const Register ret{ctx.reg_alloc.Define(inst)};
33 if (ret == composite && index == 0) {
34 // No need to do anything here, the source and destination are the same register
35 return;
36 }
37 ctx.Add("MOV.{} {}.x,{}.{};", type, ret, composite, "xyzw"[index]);
38}
39
40template <typename ObjectType>
41void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, ObjectType object,
42 u32 index, char type) {
43 const Register ret{ctx.reg_alloc.Define(inst)};
44 const char swizzle{"xyzw"[index]};
45 if (ret != composite && ret == object) {
46 // The object is aliased with the return value, so we have to use a temporary to insert
47 ctx.Add("MOV.{} RC,{};"
48 "MOV.{} RC.{},{};"
49 "MOV.{} {},RC;",
50 type, composite, type, swizzle, object, type, ret);
51 } else if (ret != composite) {
52 // The input composite is not aliased with the return value so we have to copy it before
53 // hand. But the insert object is not aliased with the return value, so we don't have to
54 // worry about that
55 ctx.Add("MOV.{} {},{};"
56 "MOV.{} {}.{},{};",
57 type, ret, composite, type, ret, swizzle, object);
58 } else {
59 // The return value is alised so we can just insert the object, it doesn't matter if it's
60 // aliased
61 ctx.Add("MOV.{} {}.{},{};", type, ret, swizzle, object);
62 }
63}
64} // Anonymous namespace
65
66void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
67 const IR::Value& e2) {
68 CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2);
69}
70
71void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
72 const IR::Value& e2, const IR::Value& e3) {
73 CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3);
74}
75
76void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
77 const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) {
78 CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3, e4);
79}
80
81void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
82 CompositeExtract(ctx, inst, composite, index, 'U');
83}
84
85void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
86 CompositeExtract(ctx, inst, composite, index, 'U');
87}
88
89void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
90 CompositeExtract(ctx, inst, composite, index, 'U');
91}
92
93void EmitCompositeInsertU32x2([[maybe_unused]] EmitContext& ctx,
94 [[maybe_unused]] Register composite,
95 [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
96 throw NotImplementedException("GLASM instruction");
97}
98
99void EmitCompositeInsertU32x3([[maybe_unused]] EmitContext& ctx,
100 [[maybe_unused]] Register composite,
101 [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
102 throw NotImplementedException("GLASM instruction");
103}
104
105void EmitCompositeInsertU32x4([[maybe_unused]] EmitContext& ctx,
106 [[maybe_unused]] Register composite,
107 [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
108 throw NotImplementedException("GLASM instruction");
109}
110
111void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
112 [[maybe_unused]] Register e2) {
113 throw NotImplementedException("GLASM instruction");
114}
115
116void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
117 [[maybe_unused]] Register e2, [[maybe_unused]] Register e3) {
118 throw NotImplementedException("GLASM instruction");
119}
120
121void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
122 [[maybe_unused]] Register e2, [[maybe_unused]] Register e3,
123 [[maybe_unused]] Register e4) {
124 throw NotImplementedException("GLASM instruction");
125}
126
127void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx,
128 [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
129 throw NotImplementedException("GLASM instruction");
130}
131
132void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx,
133 [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
134 throw NotImplementedException("GLASM instruction");
135}
136
137void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx,
138 [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
139 throw NotImplementedException("GLASM instruction");
140}
141
142void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx,
143 [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
144 [[maybe_unused]] u32 index) {
145 throw NotImplementedException("GLASM instruction");
146}
147
148void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx,
149 [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
150 [[maybe_unused]] u32 index) {
151 throw NotImplementedException("GLASM instruction");
152}
153
154void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx,
155 [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
156 [[maybe_unused]] u32 index) {
157 throw NotImplementedException("GLASM instruction");
158}
159
160void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
161 const IR::Value& e2) {
162 CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2);
163}
164
165void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
166 const IR::Value& e2, const IR::Value& e3) {
167 CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3);
168}
169
170void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
171 const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) {
172 CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3, e4);
173}
174
175void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
176 CompositeExtract(ctx, inst, composite, index, 'F');
177}
178
179void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
180 CompositeExtract(ctx, inst, composite, index, 'F');
181}
182
183void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
184 CompositeExtract(ctx, inst, composite, index, 'F');
185}
186
187void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite,
188 ScalarF32 object, u32 index) {
189 CompositeInsert(ctx, inst, composite, object, index, 'F');
190}
191
192void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite,
193 ScalarF32 object, u32 index) {
194 CompositeInsert(ctx, inst, composite, object, index, 'F');
195}
196
197void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite,
198 ScalarF32 object, u32 index) {
199 CompositeInsert(ctx, inst, composite, object, index, 'F');
200}
201
202void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) {
203 throw NotImplementedException("GLASM instruction");
204}
205
206void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) {
207 throw NotImplementedException("GLASM instruction");
208}
209
210void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) {
211 throw NotImplementedException("GLASM instruction");
212}
213
214void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) {
215 throw NotImplementedException("GLASM instruction");
216}
217
218void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) {
219 throw NotImplementedException("GLASM instruction");
220}
221
222void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) {
223 throw NotImplementedException("GLASM instruction");
224}
225
226void EmitCompositeInsertF64x2([[maybe_unused]] EmitContext& ctx,
227 [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
228 [[maybe_unused]] u32 index) {
229 throw NotImplementedException("GLASM instruction");
230}
231
232void EmitCompositeInsertF64x3([[maybe_unused]] EmitContext& ctx,
233 [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
234 [[maybe_unused]] u32 index) {
235 throw NotImplementedException("GLASM instruction");
236}
237
238void EmitCompositeInsertF64x4([[maybe_unused]] EmitContext& ctx,
239 [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
240 [[maybe_unused]] u32 index) {
241 throw NotImplementedException("GLASM instruction");
242}
243
244} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
new file mode 100644
index 000000000..02c9dc6d7
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
@@ -0,0 +1,346 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glasm/emit_context.h"
8#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10#include "shader_recompiler/profile.h"
11#include "shader_recompiler/shader_info.h"
12
13namespace Shader::Backend::GLASM {
14namespace {
15void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
16 std::string_view size) {
17 if (!binding.IsImmediate()) {
18 throw NotImplementedException("Indirect constant buffer loading");
19 }
20 const Register ret{ctx.reg_alloc.Define(inst)};
21 if (offset.type == Type::U32) {
22 // Avoid reading arrays out of bounds, matching hardware's behavior
23 if (offset.imm_u32 >= 0x10'000) {
24 ctx.Add("MOV.S {},0;", ret);
25 return;
26 }
27 }
28 ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset);
29}
30
31bool IsInputArray(Stage stage) {
32 return stage == Stage::Geometry || stage == Stage::TessellationControl ||
33 stage == Stage::TessellationEval;
34}
35
36std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) {
37 return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
38}
39
40u32 TexCoordIndex(IR::Attribute attr) {
41 return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4;
42}
43} // Anonymous namespace
44
45void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
46 GetCbuf(ctx, inst, binding, offset, "U8");
47}
48
49void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
50 GetCbuf(ctx, inst, binding, offset, "S8");
51}
52
53void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
54 GetCbuf(ctx, inst, binding, offset, "U16");
55}
56
57void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
58 GetCbuf(ctx, inst, binding, offset, "S16");
59}
60
61void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
62 GetCbuf(ctx, inst, binding, offset, "U32");
63}
64
65void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
66 GetCbuf(ctx, inst, binding, offset, "F32");
67}
68
69void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
70 ScalarU32 offset) {
71 GetCbuf(ctx, inst, binding, offset, "U32X2");
72}
73
74void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex) {
75 const u32 element{static_cast<u32>(attr) % 4};
76 const char swizzle{"xyzw"[element]};
77 if (IR::IsGeneric(attr)) {
78 const u32 index{IR::GenericAttributeIndex(attr)};
79 ctx.Add("MOV.F {}.x,in_attr{}{}[0].{};", inst, index, VertexIndex(ctx, vertex), swizzle);
80 return;
81 }
82 if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) {
83 const u32 index{TexCoordIndex(attr)};
84 ctx.Add("MOV.F {}.x,{}.texcoord[{}].{};", inst, ctx.attrib_name, index, swizzle);
85 return;
86 }
87 switch (attr) {
88 case IR::Attribute::PrimitiveId:
89 ctx.Add("MOV.S {}.x,primitive.id;", inst);
90 break;
91 case IR::Attribute::PositionX:
92 case IR::Attribute::PositionY:
93 case IR::Attribute::PositionZ:
94 case IR::Attribute::PositionW:
95 if (IsInputArray(ctx.stage)) {
96 ctx.Add("MOV.F {}.x,vertex_position{}.{};", inst, VertexIndex(ctx, vertex), swizzle);
97 } else {
98 ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.attrib_name, swizzle);
99 }
100 break;
101 case IR::Attribute::ColorFrontDiffuseR:
102 case IR::Attribute::ColorFrontDiffuseG:
103 case IR::Attribute::ColorFrontDiffuseB:
104 case IR::Attribute::ColorFrontDiffuseA:
105 ctx.Add("MOV.F {}.x,{}.color.{};", inst, ctx.attrib_name, swizzle);
106 break;
107 case IR::Attribute::PointSpriteS:
108 case IR::Attribute::PointSpriteT:
109 ctx.Add("MOV.F {}.x,{}.pointcoord.{};", inst, ctx.attrib_name, swizzle);
110 break;
111 case IR::Attribute::TessellationEvaluationPointU:
112 case IR::Attribute::TessellationEvaluationPointV:
113 ctx.Add("MOV.F {}.x,vertex.tesscoord.{};", inst, swizzle);
114 break;
115 case IR::Attribute::InstanceId:
116 ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name);
117 break;
118 case IR::Attribute::VertexId:
119 ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
120 break;
121 case IR::Attribute::FrontFace:
122 ctx.Add("CMP.S {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name);
123 break;
124 default:
125 throw NotImplementedException("Get attribute {}", attr);
126 }
127}
128
129void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value,
130 [[maybe_unused]] ScalarU32 vertex) {
131 const u32 element{static_cast<u32>(attr) % 4};
132 const char swizzle{"xyzw"[element]};
133 if (IR::IsGeneric(attr)) {
134 const u32 index{IR::GenericAttributeIndex(attr)};
135 ctx.Add("MOV.F out_attr{}[0].{},{};", index, swizzle, value);
136 return;
137 }
138 if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9R) {
139 const u32 index{TexCoordIndex(attr)};
140 ctx.Add("MOV.F result.texcoord[{}].{},{};", index, swizzle, value);
141 return;
142 }
143 switch (attr) {
144 case IR::Attribute::Layer:
145 if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) {
146 ctx.Add("MOV.F result.layer.x,{};", value);
147 } else {
148 LOG_WARNING(Shader_GLASM,
149 "Layer stored outside of geometry shader not supported by device");
150 }
151 break;
152 case IR::Attribute::ViewportIndex:
153 if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) {
154 ctx.Add("MOV.F result.viewport.x,{};", value);
155 } else {
156 LOG_WARNING(Shader_GLASM,
157 "Viewport stored outside of geometry shader not supported by device");
158 }
159 break;
160 case IR::Attribute::ViewportMask:
161 // NV_viewport_array2 is required to access result.viewportmask, regardless of shader stage.
162 if (ctx.profile.support_viewport_index_layer_non_geometry) {
163 ctx.Add("MOV.F result.viewportmask[0].x,{};", value);
164 } else {
165 LOG_WARNING(Shader_GLASM, "Device does not support storing to ViewportMask");
166 }
167 break;
168 case IR::Attribute::PointSize:
169 ctx.Add("MOV.F result.pointsize.x,{};", value);
170 break;
171 case IR::Attribute::PositionX:
172 case IR::Attribute::PositionY:
173 case IR::Attribute::PositionZ:
174 case IR::Attribute::PositionW:
175 ctx.Add("MOV.F result.position.{},{};", swizzle, value);
176 break;
177 case IR::Attribute::ColorFrontDiffuseR:
178 case IR::Attribute::ColorFrontDiffuseG:
179 case IR::Attribute::ColorFrontDiffuseB:
180 case IR::Attribute::ColorFrontDiffuseA:
181 ctx.Add("MOV.F result.color.{},{};", swizzle, value);
182 break;
183 case IR::Attribute::ColorFrontSpecularR:
184 case IR::Attribute::ColorFrontSpecularG:
185 case IR::Attribute::ColorFrontSpecularB:
186 case IR::Attribute::ColorFrontSpecularA:
187 ctx.Add("MOV.F result.color.secondary.{},{};", swizzle, value);
188 break;
189 case IR::Attribute::ColorBackDiffuseR:
190 case IR::Attribute::ColorBackDiffuseG:
191 case IR::Attribute::ColorBackDiffuseB:
192 case IR::Attribute::ColorBackDiffuseA:
193 ctx.Add("MOV.F result.color.back.{},{};", swizzle, value);
194 break;
195 case IR::Attribute::ColorBackSpecularR:
196 case IR::Attribute::ColorBackSpecularG:
197 case IR::Attribute::ColorBackSpecularB:
198 case IR::Attribute::ColorBackSpecularA:
199 ctx.Add("MOV.F result.color.back.secondary.{},{};", swizzle, value);
200 break;
201 case IR::Attribute::FogCoordinate:
202 ctx.Add("MOV.F result.fogcoord.x,{};", value);
203 break;
204 case IR::Attribute::ClipDistance0:
205 case IR::Attribute::ClipDistance1:
206 case IR::Attribute::ClipDistance2:
207 case IR::Attribute::ClipDistance3:
208 case IR::Attribute::ClipDistance4:
209 case IR::Attribute::ClipDistance5:
210 case IR::Attribute::ClipDistance6:
211 case IR::Attribute::ClipDistance7: {
212 const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)};
213 ctx.Add("MOV.F result.clip[{}].x,{};", index, value);
214 break;
215 }
216 default:
217 throw NotImplementedException("Set attribute {}", attr);
218 }
219}
220
221void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex) {
222 // RC.x = base_index
223 // RC.y = masked_index
224 // RC.z = compare_index
225 ctx.Add("SHR.S RC.x,{},2;"
226 "AND.S RC.y,RC.x,3;"
227 "SHR.S RC.z,{},4;",
228 offset, offset);
229
230 const Register ret{ctx.reg_alloc.Define(inst)};
231 u32 num_endifs{};
232 const auto read{[&](u32 compare_index, const std::array<std::string, 4>& values) {
233 ++num_endifs;
234 ctx.Add("SEQ.S.CC RC.w,RC.z,{};" // compare_index
235 "IF NE.w;"
236 // X
237 "SEQ.S.CC RC.w,RC.y,0;"
238 "IF NE.w;"
239 "MOV {}.x,{};"
240 "ELSE;"
241 // Y
242 "SEQ.S.CC RC.w,RC.y,1;"
243 "IF NE.w;"
244 "MOV {}.x,{};"
245 "ELSE;"
246 // Z
247 "SEQ.S.CC RC.w,RC.y,2;"
248 "IF NE.w;"
249 "MOV {}.x,{};"
250 "ELSE;"
251 // W
252 "MOV {}.x,{};"
253 "ENDIF;"
254 "ENDIF;"
255 "ENDIF;"
256 "ELSE;",
257 compare_index, ret, values[0], ret, values[1], ret, values[2], ret, values[3]);
258 }};
259 const auto read_swizzled{[&](u32 compare_index, std::string_view value) {
260 const std::array values{fmt::format("{}.x", value), fmt::format("{}.y", value),
261 fmt::format("{}.z", value), fmt::format("{}.w", value)};
262 read(compare_index, values);
263 }};
264 if (ctx.info.loads.AnyComponent(IR::Attribute::PositionX)) {
265 const u32 index{static_cast<u32>(IR::Attribute::PositionX)};
266 if (IsInputArray(ctx.stage)) {
267 read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex)));
268 } else {
269 read_swizzled(index, fmt::format("{}.position", ctx.attrib_name));
270 }
271 }
272 for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) {
273 if (!ctx.info.loads.Generic(index)) {
274 continue;
275 }
276 read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex)));
277 }
278 for (u32 i = 0; i < num_endifs; ++i) {
279 ctx.Add("ENDIF;");
280 }
281}
282
283void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset,
284 [[maybe_unused]] ScalarF32 value, [[maybe_unused]] ScalarU32 vertex) {
285 throw NotImplementedException("GLASM instruction");
286}
287
288void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) {
289 if (!IR::IsGeneric(patch)) {
290 throw NotImplementedException("Non-generic patch load");
291 }
292 const u32 index{IR::GenericPatchIndex(patch)};
293 const u32 element{IR::GenericPatchElement(patch)};
294 const char swizzle{"xyzw"[element]};
295 const std::string_view out{ctx.stage == Stage::TessellationControl ? ".out" : ""};
296 ctx.Add("MOV.F {},primitive{}.patch.attrib[{}].{};", inst, out, index, swizzle);
297}
298
299void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value) {
300 if (IR::IsGeneric(patch)) {
301 const u32 index{IR::GenericPatchIndex(patch)};
302 const u32 element{IR::GenericPatchElement(patch)};
303 ctx.Add("MOV.F result.patch.attrib[{}].{},{};", index, "xyzw"[element], value);
304 return;
305 }
306 switch (patch) {
307 case IR::Patch::TessellationLodLeft:
308 case IR::Patch::TessellationLodRight:
309 case IR::Patch::TessellationLodTop:
310 case IR::Patch::TessellationLodBottom: {
311 const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
312 ctx.Add("MOV.F result.patch.tessouter[{}].x,{};", index, value);
313 break;
314 }
315 case IR::Patch::TessellationLodInteriorU:
316 ctx.Add("MOV.F result.patch.tessinner[0].x,{};", value);
317 break;
318 case IR::Patch::TessellationLodInteriorV:
319 ctx.Add("MOV.F result.patch.tessinner[1].x,{};", value);
320 break;
321 default:
322 throw NotImplementedException("Patch {}", patch);
323 }
324}
325
326void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value) {
327 ctx.Add("MOV.F frag_color{}.{},{};", index, "xyzw"[component], value);
328}
329
330void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value) {
331 ctx.Add("MOV.S result.samplemask.x,{};", value);
332}
333
334void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value) {
335 ctx.Add("MOV.F result.depth.z,{};", value);
336}
337
338void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset) {
339 ctx.Add("MOV.U {},lmem[{}].x;", inst, word_offset);
340}
341
342void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value) {
343 ctx.Add("MOV.U lmem[{}].x,{};", word_offset, value);
344}
345
346} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp
new file mode 100644
index 000000000..ccdf1cbc8
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp
@@ -0,0 +1,231 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glasm/emit_context.h"
8#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/ir/value.h"
11
12namespace Shader::Backend::GLASM {
13namespace {
14std::string_view FpRounding(IR::FpRounding fp_rounding) {
15 switch (fp_rounding) {
16 case IR::FpRounding::DontCare:
17 return "";
18 case IR::FpRounding::RN:
19 return ".ROUND";
20 case IR::FpRounding::RZ:
21 return ".TRUNC";
22 case IR::FpRounding::RM:
23 return ".FLR";
24 case IR::FpRounding::RP:
25 return ".CEIL";
26 }
27 throw InvalidArgument("Invalid floating-point rounding {}", fp_rounding);
28}
29
30template <typename InputType>
31void Convert(EmitContext& ctx, IR::Inst& inst, InputType value, std::string_view dest,
32 std::string_view src, bool is_long_result) {
33 const std::string_view fp_rounding{FpRounding(inst.Flags<IR::FpControl>().rounding)};
34 const auto ret{is_long_result ? ctx.reg_alloc.LongDefine(inst) : ctx.reg_alloc.Define(inst)};
35 ctx.Add("CVT.{}.{}{} {}.x,{};", dest, src, fp_rounding, ret, value);
36}
37} // Anonymous namespace
38
39void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value) {
40 Convert(ctx, inst, value, "S16", "F16", false);
41}
42
43void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
44 Convert(ctx, inst, value, "S16", "F32", false);
45}
46
47void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
48 Convert(ctx, inst, value, "S16", "F64", false);
49}
50
51void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
52 Convert(ctx, inst, value, "S32", "F16", false);
53}
54
55void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
56 Convert(ctx, inst, value, "S32", "F32", false);
57}
58
59void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
60 Convert(ctx, inst, value, "S32", "F64", false);
61}
62
63void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value) {
64 Convert(ctx, inst, value, "S64", "F16", true);
65}
66
67void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
68 Convert(ctx, inst, value, "S64", "F32", true);
69}
70
71void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
72 Convert(ctx, inst, value, "S64", "F64", true);
73}
74
75void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value) {
76 Convert(ctx, inst, value, "U16", "F16", false);
77}
78
79void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
80 Convert(ctx, inst, value, "U16", "F32", false);
81}
82
83void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
84 Convert(ctx, inst, value, "U16", "F64", false);
85}
86
87void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
88 Convert(ctx, inst, value, "U32", "F16", false);
89}
90
91void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
92 Convert(ctx, inst, value, "U32", "F32", false);
93}
94
95void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
96 Convert(ctx, inst, value, "U32", "F64", false);
97}
98
99void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value) {
100 Convert(ctx, inst, value, "U64", "F16", true);
101}
102
103void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
104 Convert(ctx, inst, value, "U64", "F32", true);
105}
106
107void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
108 Convert(ctx, inst, value, "U64", "F64", true);
109}
110
111void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
112 Convert(ctx, inst, value, "U64", "U32", true);
113}
114
115void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value) {
116 Convert(ctx, inst, value, "U32", "U64", false);
117}
118
119void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
120 Convert(ctx, inst, value, "F16", "F32", false);
121}
122
123void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
124 Convert(ctx, inst, value, "F32", "F16", false);
125}
126
127void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
128 Convert(ctx, inst, value, "F32", "F64", false);
129}
130
131void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
132 Convert(ctx, inst, value, "F64", "F32", true);
133}
134
135void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value) {
136 Convert(ctx, inst, value, "F16", "S8", false);
137}
138
139void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value) {
140 Convert(ctx, inst, value, "F16", "S16", false);
141}
142
143void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
144 Convert(ctx, inst, value, "F16", "S32", false);
145}
146
147void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value) {
148 Convert(ctx, inst, value, "F16", "S64", false);
149}
150
151void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value) {
152 Convert(ctx, inst, value, "F16", "U8", false);
153}
154
155void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value) {
156 Convert(ctx, inst, value, "F16", "U16", false);
157}
158
159void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
160 Convert(ctx, inst, value, "F16", "U32", false);
161}
162
163void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value) {
164 Convert(ctx, inst, value, "F16", "U64", false);
165}
166
167void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value) {
168 Convert(ctx, inst, value, "F32", "S8", false);
169}
170
171void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value) {
172 Convert(ctx, inst, value, "F32", "S16", false);
173}
174
175void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
176 Convert(ctx, inst, value, "F32", "S32", false);
177}
178
179void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value) {
180 Convert(ctx, inst, value, "F32", "S64", false);
181}
182
183void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value) {
184 Convert(ctx, inst, value, "F32", "U8", false);
185}
186
187void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value) {
188 Convert(ctx, inst, value, "F32", "U16", false);
189}
190
191void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
192 Convert(ctx, inst, value, "F32", "U32", false);
193}
194
195void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value) {
196 Convert(ctx, inst, value, "F32", "U64", false);
197}
198
199void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value) {
200 Convert(ctx, inst, value, "F64", "S8", true);
201}
202
203void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value) {
204 Convert(ctx, inst, value, "F64", "S16", true);
205}
206
207void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
208 Convert(ctx, inst, value, "F64", "S32", true);
209}
210
211void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value) {
212 Convert(ctx, inst, value, "F64", "S64", true);
213}
214
215void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value) {
216 Convert(ctx, inst, value, "F64", "U8", true);
217}
218
219void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value) {
220 Convert(ctx, inst, value, "F64", "U16", true);
221}
222
223void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
224 Convert(ctx, inst, value, "F64", "U32", true);
225}
226
227void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value) {
228 Convert(ctx, inst, value, "F64", "U64", true);
229}
230
231} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp
new file mode 100644
index 000000000..4ed58619d
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp
@@ -0,0 +1,414 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glasm/emit_context.h"
8#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/ir/value.h"
11
12namespace Shader::Backend::GLASM {
13namespace {
14template <typename InputType>
15void Compare(EmitContext& ctx, IR::Inst& inst, InputType lhs, InputType rhs, std::string_view op,
16 std::string_view type, bool ordered, bool inequality = false) {
17 const Register ret{ctx.reg_alloc.Define(inst)};
18 ctx.Add("{}.{} RC.x,{},{};", op, type, lhs, rhs);
19 if (ordered && inequality) {
20 ctx.Add("SEQ.{} RC.y,{},{};"
21 "SEQ.{} RC.z,{},{};"
22 "AND.U RC.x,RC.x,RC.y;"
23 "AND.U RC.x,RC.x,RC.z;"
24 "SNE.S {}.x,RC.x,0;",
25 type, lhs, lhs, type, rhs, rhs, ret);
26 } else if (ordered) {
27 ctx.Add("SNE.S {}.x,RC.x,0;", ret);
28 } else {
29 ctx.Add("SNE.{} RC.y,{},{};"
30 "SNE.{} RC.z,{},{};"
31 "OR.U RC.x,RC.x,RC.y;"
32 "OR.U RC.x,RC.x,RC.z;"
33 "SNE.S {}.x,RC.x,0;",
34 type, lhs, lhs, type, rhs, rhs, ret);
35 }
36}
37
38template <typename InputType>
39void Clamp(EmitContext& ctx, Register ret, InputType value, InputType min_value,
40 InputType max_value, std::string_view type) {
41 // Call MAX first to properly clamp nan to min_value instead
42 ctx.Add("MAX.{} RC.x,{},{};"
43 "MIN.{} {}.x,RC.x,{};",
44 type, min_value, value, type, ret, max_value);
45}
46
47std::string_view Precise(IR::Inst& inst) {
48 const bool precise{inst.Flags<IR::FpControl>().no_contraction};
49 return precise ? ".PREC" : "";
50}
51} // Anonymous namespace
52
53void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
54 [[maybe_unused]] Register value) {
55 throw NotImplementedException("GLASM instruction");
56}
57
58void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
59 ctx.Add("MOV.F {}.x,|{}|;", inst, value);
60}
61
62void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
63 ctx.LongAdd("MOV.F64 {}.x,|{}|;", inst, value);
64}
65
66void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
67 [[maybe_unused]] Register a, [[maybe_unused]] Register b) {
68 throw NotImplementedException("GLASM instruction");
69}
70
71void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
72 ctx.Add("ADD.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b);
73}
74
75void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
76 ctx.Add("ADD.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b);
77}
78
79void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
80 [[maybe_unused]] Register a, [[maybe_unused]] Register b,
81 [[maybe_unused]] Register c) {
82 throw NotImplementedException("GLASM instruction");
83}
84
85void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c) {
86 ctx.Add("MAD.F{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b, c);
87}
88
89void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c) {
90 ctx.Add("MAD.F64{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b, c);
91}
92
93void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
94 ctx.Add("MAX.F {}.x,{},{};", inst, a, b);
95}
96
97void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
98 ctx.LongAdd("MAX.F64 {}.x,{},{};", inst, a, b);
99}
100
101void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
102 ctx.Add("MIN.F {}.x,{},{};", inst, a, b);
103}
104
105void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
106 ctx.LongAdd("MIN.F64 {}.x,{},{};", inst, a, b);
107}
108
109void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
110 [[maybe_unused]] Register a, [[maybe_unused]] Register b) {
111 throw NotImplementedException("GLASM instruction");
112}
113
114void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
115 ctx.Add("MUL.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b);
116}
117
118void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
119 ctx.Add("MUL.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b);
120}
121
122void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
123 throw NotImplementedException("GLASM instruction");
124}
125
126void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value) {
127 ctx.Add("MOV.F {}.x,-{};", inst, value);
128}
129
130void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value) {
131 ctx.LongAdd("MOV.F64 {}.x,-{};", inst, value);
132}
133
134void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
135 ctx.Add("SIN {}.x,{};", inst, value);
136}
137
138void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
139 ctx.Add("COS {}.x,{};", inst, value);
140}
141
142void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
143 ctx.Add("EX2 {}.x,{};", inst, value);
144}
145
146void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
147 ctx.Add("LG2 {}.x,{};", inst, value);
148}
149
150void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
151 ctx.Add("RCP {}.x,{};", inst, value);
152}
153
154void EmitFPRecip64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
155 throw NotImplementedException("GLASM instruction");
156}
157
158void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
159 ctx.Add("RSQ {}.x,{};", inst, value);
160}
161
162void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
163 throw NotImplementedException("GLASM instruction");
164}
165
166void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
167 const Register ret{ctx.reg_alloc.Define(inst)};
168 ctx.Add("RSQ RC.x,{};RCP {}.x,RC.x;", value, ret);
169}
170
171void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
172 throw NotImplementedException("GLASM instruction");
173}
174
175void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
176 ctx.Add("MOV.F.SAT {}.x,{};", inst, value);
177}
178
179void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
180 throw NotImplementedException("GLASM instruction");
181}
182
183void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value,
184 [[maybe_unused]] Register min_value, [[maybe_unused]] Register max_value) {
185 throw NotImplementedException("GLASM instruction");
186}
187
188void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value,
189 ScalarF32 max_value) {
190 Clamp(ctx, ctx.reg_alloc.Define(inst), value, min_value, max_value, "F");
191}
192
193void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value,
194 ScalarF64 max_value) {
195 Clamp(ctx, ctx.reg_alloc.LongDefine(inst), value, min_value, max_value, "F64");
196}
197
198void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
199 throw NotImplementedException("GLASM instruction");
200}
201
202void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
203 ctx.Add("ROUND.F {}.x,{};", inst, value);
204}
205
206void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
207 ctx.LongAdd("ROUND.F64 {}.x,{};", inst, value);
208}
209
210void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
211 throw NotImplementedException("GLASM instruction");
212}
213
214void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
215 ctx.Add("FLR.F {}.x,{};", inst, value);
216}
217
218void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
219 ctx.LongAdd("FLR.F64 {}.x,{};", inst, value);
220}
221
222void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
223 throw NotImplementedException("GLASM instruction");
224}
225
226void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
227 ctx.Add("CEIL.F {}.x,{};", inst, value);
228}
229
230void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
231 ctx.LongAdd("CEIL.F64 {}.x,{};", inst, value);
232}
233
234void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
235 throw NotImplementedException("GLASM instruction");
236}
237
238void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
239 ctx.Add("TRUNC.F {}.x,{};", inst, value);
240}
241
242void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
243 ctx.LongAdd("TRUNC.F64 {}.x,{};", inst, value);
244}
245
246void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
247 [[maybe_unused]] Register rhs) {
248 throw NotImplementedException("GLASM instruction");
249}
250
251void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
252 Compare(ctx, inst, lhs, rhs, "SEQ", "F", true);
253}
254
255void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
256 Compare(ctx, inst, lhs, rhs, "SEQ", "F64", true);
257}
258
259void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
260 [[maybe_unused]] Register rhs) {
261 throw NotImplementedException("GLASM instruction");
262}
263
264void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
265 Compare(ctx, inst, lhs, rhs, "SEQ", "F", false);
266}
267
268void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
269 Compare(ctx, inst, lhs, rhs, "SEQ", "F64", false);
270}
271
272void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
273 [[maybe_unused]] Register rhs) {
274 throw NotImplementedException("GLASM instruction");
275}
276
277void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
278 Compare(ctx, inst, lhs, rhs, "SNE", "F", true, true);
279}
280
281void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
282 Compare(ctx, inst, lhs, rhs, "SNE", "F64", true, true);
283}
284
285void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
286 [[maybe_unused]] Register rhs) {
287 throw NotImplementedException("GLASM instruction");
288}
289
290void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
291 Compare(ctx, inst, lhs, rhs, "SNE", "F", false, true);
292}
293
294void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
295 Compare(ctx, inst, lhs, rhs, "SNE", "F64", false, true);
296}
297
298void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
299 [[maybe_unused]] Register rhs) {
300 throw NotImplementedException("GLASM instruction");
301}
302
303void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
304 Compare(ctx, inst, lhs, rhs, "SLT", "F", true);
305}
306
307void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
308 Compare(ctx, inst, lhs, rhs, "SLT", "F64", true);
309}
310
311void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
312 [[maybe_unused]] Register rhs) {
313 throw NotImplementedException("GLASM instruction");
314}
315
316void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
317 Compare(ctx, inst, lhs, rhs, "SLT", "F", false);
318}
319
320void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
321 Compare(ctx, inst, lhs, rhs, "SLT", "F64", false);
322}
323
324void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
325 [[maybe_unused]] Register rhs) {
326 throw NotImplementedException("GLASM instruction");
327}
328
329void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
330 Compare(ctx, inst, lhs, rhs, "SGT", "F", true);
331}
332
333void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
334 Compare(ctx, inst, lhs, rhs, "SGT", "F64", true);
335}
336
337void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
338 [[maybe_unused]] Register rhs) {
339 throw NotImplementedException("GLASM instruction");
340}
341
342void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
343 Compare(ctx, inst, lhs, rhs, "SGT", "F", false);
344}
345
346void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
347 Compare(ctx, inst, lhs, rhs, "SGT", "F64", false);
348}
349
350void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
351 [[maybe_unused]] Register rhs) {
352 throw NotImplementedException("GLASM instruction");
353}
354
355void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
356 Compare(ctx, inst, lhs, rhs, "SLE", "F", true);
357}
358
359void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
360 Compare(ctx, inst, lhs, rhs, "SLE", "F64", true);
361}
362
363void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
364 [[maybe_unused]] Register rhs) {
365 throw NotImplementedException("GLASM instruction");
366}
367
368void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
369 Compare(ctx, inst, lhs, rhs, "SLE", "F", false);
370}
371
372void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
373 Compare(ctx, inst, lhs, rhs, "SLE", "F64", false);
374}
375
376void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
377 [[maybe_unused]] Register rhs) {
378 throw NotImplementedException("GLASM instruction");
379}
380
381void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
382 Compare(ctx, inst, lhs, rhs, "SGE", "F", true);
383}
384
385void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
386 Compare(ctx, inst, lhs, rhs, "SGE", "F64", true);
387}
388
389void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
390 [[maybe_unused]] Register rhs) {
391 throw NotImplementedException("GLASM instruction");
392}
393
394void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
395 Compare(ctx, inst, lhs, rhs, "SGE", "F", false);
396}
397
398void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
399 Compare(ctx, inst, lhs, rhs, "SGE", "F64", false);
400}
401
402void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
403 throw NotImplementedException("GLASM instruction");
404}
405
406void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
407 Compare(ctx, inst, value, value, "SNE", "F", true, false);
408}
409
410void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
411 Compare(ctx, inst, value, value, "SNE", "F64", true, false);
412}
413
414} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
new file mode 100644
index 000000000..09e3a9b82
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
@@ -0,0 +1,850 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "shader_recompiler/backend/glasm/emit_context.h"
8#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/ir/value.h"
11
12namespace Shader::Backend::GLASM {
13namespace {
14struct ScopedRegister {
15 ScopedRegister() = default;
16 ScopedRegister(RegAlloc& reg_alloc_) : reg_alloc{&reg_alloc_}, reg{reg_alloc->AllocReg()} {}
17
18 ~ScopedRegister() {
19 if (reg_alloc) {
20 reg_alloc->FreeReg(reg);
21 }
22 }
23
24 ScopedRegister& operator=(ScopedRegister&& rhs) noexcept {
25 if (reg_alloc) {
26 reg_alloc->FreeReg(reg);
27 }
28 reg_alloc = std::exchange(rhs.reg_alloc, nullptr);
29 reg = rhs.reg;
30 return *this;
31 }
32
33 ScopedRegister(ScopedRegister&& rhs) noexcept
34 : reg_alloc{std::exchange(rhs.reg_alloc, nullptr)}, reg{rhs.reg} {}
35
36 ScopedRegister& operator=(const ScopedRegister&) = delete;
37 ScopedRegister(const ScopedRegister&) = delete;
38
39 RegAlloc* reg_alloc{};
40 Register reg;
41};
42
43std::string Texture(EmitContext& ctx, IR::TextureInstInfo info,
44 [[maybe_unused]] const IR::Value& index) {
45 // FIXME: indexed reads
46 if (info.type == TextureType::Buffer) {
47 return fmt::format("texture[{}]", ctx.texture_buffer_bindings.at(info.descriptor_index));
48 } else {
49 return fmt::format("texture[{}]", ctx.texture_bindings.at(info.descriptor_index));
50 }
51}
52
53std::string Image(EmitContext& ctx, IR::TextureInstInfo info,
54 [[maybe_unused]] const IR::Value& index) {
55 // FIXME: indexed reads
56 if (info.type == TextureType::Buffer) {
57 return fmt::format("image[{}]", ctx.image_buffer_bindings.at(info.descriptor_index));
58 } else {
59 return fmt::format("image[{}]", ctx.image_bindings.at(info.descriptor_index));
60 }
61}
62
63std::string_view TextureType(IR::TextureInstInfo info) {
64 if (info.is_depth) {
65 switch (info.type) {
66 case TextureType::Color1D:
67 return "SHADOW1D";
68 case TextureType::ColorArray1D:
69 return "SHADOWARRAY1D";
70 case TextureType::Color2D:
71 return "SHADOW2D";
72 case TextureType::ColorArray2D:
73 return "SHADOWARRAY2D";
74 case TextureType::Color3D:
75 return "SHADOW3D";
76 case TextureType::ColorCube:
77 return "SHADOWCUBE";
78 case TextureType::ColorArrayCube:
79 return "SHADOWARRAYCUBE";
80 case TextureType::Buffer:
81 return "SHADOWBUFFER";
82 }
83 } else {
84 switch (info.type) {
85 case TextureType::Color1D:
86 return "1D";
87 case TextureType::ColorArray1D:
88 return "ARRAY1D";
89 case TextureType::Color2D:
90 return "2D";
91 case TextureType::ColorArray2D:
92 return "ARRAY2D";
93 case TextureType::Color3D:
94 return "3D";
95 case TextureType::ColorCube:
96 return "CUBE";
97 case TextureType::ColorArrayCube:
98 return "ARRAYCUBE";
99 case TextureType::Buffer:
100 return "BUFFER";
101 }
102 }
103 throw InvalidArgument("Invalid texture type {}", info.type.Value());
104}
105
106std::string Offset(EmitContext& ctx, const IR::Value& offset) {
107 if (offset.IsEmpty()) {
108 return "";
109 }
110 return fmt::format(",offset({})", Register{ctx.reg_alloc.Consume(offset)});
111}
112
113std::pair<ScopedRegister, ScopedRegister> AllocOffsetsRegs(EmitContext& ctx,
114 const IR::Value& offset2) {
115 if (offset2.IsEmpty()) {
116 return {};
117 } else {
118 return {ctx.reg_alloc, ctx.reg_alloc};
119 }
120}
121
122void SwizzleOffsets(EmitContext& ctx, Register off_x, Register off_y, const IR::Value& offset1,
123 const IR::Value& offset2) {
124 const Register offsets_a{ctx.reg_alloc.Consume(offset1)};
125 const Register offsets_b{ctx.reg_alloc.Consume(offset2)};
126 // Input swizzle: [XYXY] [XYXY]
127 // Output swizzle: [XXXX] [YYYY]
128 ctx.Add("MOV {}.x,{}.x;"
129 "MOV {}.y,{}.z;"
130 "MOV {}.z,{}.x;"
131 "MOV {}.w,{}.z;"
132 "MOV {}.x,{}.y;"
133 "MOV {}.y,{}.w;"
134 "MOV {}.z,{}.y;"
135 "MOV {}.w,{}.w;",
136 off_x, offsets_a, off_x, offsets_a, off_x, offsets_b, off_x, offsets_b, off_y,
137 offsets_a, off_y, offsets_a, off_y, offsets_b, off_y, offsets_b);
138}
139
140std::string GradOffset(const IR::Value& offset) {
141 if (offset.IsImmediate()) {
142 LOG_WARNING(Shader_GLASM, "Gradient offset is a scalar immediate");
143 return "";
144 }
145 IR::Inst* const vector{offset.InstRecursive()};
146 if (!vector->AreAllArgsImmediates()) {
147 LOG_WARNING(Shader_GLASM, "Gradient offset vector is not immediate");
148 return "";
149 }
150 switch (vector->NumArgs()) {
151 case 1:
152 return fmt::format(",({})", static_cast<s32>(vector->Arg(0).U32()));
153 case 2:
154 return fmt::format(",({},{})", static_cast<s32>(vector->Arg(0).U32()),
155 static_cast<s32>(vector->Arg(1).U32()));
156 default:
157 throw LogicError("Invalid number of gradient offsets {}", vector->NumArgs());
158 }
159}
160
161std::pair<std::string, ScopedRegister> Coord(EmitContext& ctx, const IR::Value& coord) {
162 if (coord.IsImmediate()) {
163 ScopedRegister scoped_reg(ctx.reg_alloc);
164 ctx.Add("MOV.U {}.x,{};", scoped_reg.reg, ScalarU32{ctx.reg_alloc.Consume(coord)});
165 return {fmt::to_string(scoped_reg.reg), std::move(scoped_reg)};
166 }
167 std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})};
168 if (coord.InstRecursive()->HasUses()) {
169 // Move non-dead coords to a separate register, although this should never happen because
170 // vectors are only assembled for immediate texture instructions
171 ctx.Add("MOV.F RC,{};", coord_vec);
172 coord_vec = "RC";
173 }
174 return {std::move(coord_vec), ScopedRegister{}};
175}
176
177void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) {
178 if (!sparse_inst) {
179 return;
180 }
181 const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)};
182 ctx.Add("MOV.S {},-1;"
183 "MOV.S {}(NONRESIDENT),0;",
184 sparse_ret, sparse_ret);
185}
186
187std::string_view FormatStorage(ImageFormat format) {
188 switch (format) {
189 case ImageFormat::Typeless:
190 return "U";
191 case ImageFormat::R8_UINT:
192 return "U8";
193 case ImageFormat::R8_SINT:
194 return "S8";
195 case ImageFormat::R16_UINT:
196 return "U16";
197 case ImageFormat::R16_SINT:
198 return "S16";
199 case ImageFormat::R32_UINT:
200 return "U32";
201 case ImageFormat::R32G32_UINT:
202 return "U32X2";
203 case ImageFormat::R32G32B32A32_UINT:
204 return "U32X4";
205 }
206 throw InvalidArgument("Invalid image format {}", format);
207}
208
209template <typename T>
210void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, T value,
211 std::string_view op) {
212 const auto info{inst.Flags<IR::TextureInstInfo>()};
213 const std::string_view type{TextureType(info)};
214 const std::string image{Image(ctx, info, index)};
215 const Register ret{ctx.reg_alloc.Define(inst)};
216 ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type);
217}
218
219IR::Inst* PrepareSparse(IR::Inst& inst) {
220 const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
221 if (sparse_inst) {
222 sparse_inst->Invalidate();
223 }
224 return sparse_inst;
225}
226} // Anonymous namespace
227
228void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
229 const IR::Value& coord, Register bias_lc, const IR::Value& offset) {
230 const auto info{inst.Flags<IR::TextureInstInfo>()};
231 const auto sparse_inst{PrepareSparse(inst)};
232 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
233 const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""};
234 const std::string_view type{TextureType(info)};
235 const std::string texture{Texture(ctx, info, index)};
236 const std::string offset_vec{Offset(ctx, offset)};
237 const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
238 const Register ret{ctx.reg_alloc.Define(inst)};
239 if (info.has_bias) {
240 if (info.type == TextureType::ColorArrayCube) {
241 ctx.Add("TXB.F{}{} {},{},{},{},ARRAYCUBE{};", lod_clamp_mod, sparse_mod, ret, coord_vec,
242 bias_lc, texture, offset_vec);
243 } else {
244 if (info.has_lod_clamp) {
245 ctx.Add("MOV.F {}.w,{}.x;"
246 "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};",
247 coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, texture, type,
248 offset_vec);
249 } else {
250 ctx.Add("MOV.F {}.w,{}.x;"
251 "TXB.F{} {},{},{},{}{};",
252 coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, type, offset_vec);
253 }
254 }
255 } else {
256 if (info.has_lod_clamp && info.type == TextureType::ColorArrayCube) {
257 ctx.Add("TEX.F.LODCLAMP{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec,
258 bias_lc, texture, offset_vec);
259 } else {
260 ctx.Add("TEX.F{}{} {},{},{},{}{};", lod_clamp_mod, sparse_mod, ret, coord_vec, texture,
261 type, offset_vec);
262 }
263 }
264 StoreSparse(ctx, sparse_inst);
265}
266
267void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
268 const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) {
269 const auto info{inst.Flags<IR::TextureInstInfo>()};
270 const auto sparse_inst{PrepareSparse(inst)};
271 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
272 const std::string_view type{TextureType(info)};
273 const std::string texture{Texture(ctx, info, index)};
274 const std::string offset_vec{Offset(ctx, offset)};
275 const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
276 const Register ret{ctx.reg_alloc.Define(inst)};
277 if (info.type == TextureType::ColorArrayCube) {
278 ctx.Add("TXL.F{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, lod, texture,
279 offset_vec);
280 } else {
281 ctx.Add("MOV.F {}.w,{};"
282 "TXL.F{} {},{},{},{}{};",
283 coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec);
284 }
285 StoreSparse(ctx, sparse_inst);
286}
287
288void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
289 const IR::Value& coord, const IR::Value& dref,
290 const IR::Value& bias_lc, const IR::Value& offset) {
291 // Allocate early to avoid aliases
292 const auto info{inst.Flags<IR::TextureInstInfo>()};
293 ScopedRegister staging;
294 if (info.type == TextureType::ColorArrayCube) {
295 staging = ScopedRegister{ctx.reg_alloc};
296 }
297 const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
298 const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)};
299 const auto sparse_inst{PrepareSparse(inst)};
300 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
301 const std::string_view type{TextureType(info)};
302 const std::string texture{Texture(ctx, info, index)};
303 const std::string offset_vec{Offset(ctx, offset)};
304 const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
305 const Register ret{ctx.reg_alloc.Define(inst)};
306 if (info.has_bias) {
307 if (info.has_lod_clamp) {
308 switch (info.type) {
309 case TextureType::Color1D:
310 case TextureType::ColorArray1D:
311 case TextureType::Color2D:
312 ctx.Add("MOV.F {}.z,{};"
313 "MOV.F {}.w,{}.x;"
314 "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};",
315 coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec,
316 bias_lc_vec, texture, type, offset_vec);
317 break;
318 case TextureType::ColorArray2D:
319 case TextureType::ColorCube:
320 ctx.Add("MOV.F {}.w,{};"
321 "TXB.F.LODCLAMP{} {},{},{},{},{}{};",
322 coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type,
323 offset_vec);
324 break;
325 default:
326 throw NotImplementedException("Invalid type {} with bias and lod clamp",
327 info.type.Value());
328 }
329 } else {
330 switch (info.type) {
331 case TextureType::Color1D:
332 case TextureType::ColorArray1D:
333 case TextureType::Color2D:
334 ctx.Add("MOV.F {}.z,{};"
335 "MOV.F {}.w,{}.x;"
336 "TXB.F{} {},{},{},{}{};",
337 coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec,
338 texture, type, offset_vec);
339 break;
340 case TextureType::ColorArray2D:
341 case TextureType::ColorCube:
342 ctx.Add("MOV.F {}.w,{};"
343 "TXB.F{} {},{},{},{},{}{};",
344 coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type,
345 offset_vec);
346 break;
347 case TextureType::ColorArrayCube:
348 ctx.Add("MOV.F {}.x,{};"
349 "MOV.F {}.y,{}.x;"
350 "TXB.F{} {},{},{},{},{}{};",
351 staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec,
352 staging.reg, texture, type, offset_vec);
353 break;
354 default:
355 throw NotImplementedException("Invalid type {}", info.type.Value());
356 }
357 }
358 } else {
359 if (info.has_lod_clamp) {
360 if (info.type != TextureType::ColorArrayCube) {
361 const bool w_swizzle{info.type == TextureType::ColorArray2D ||
362 info.type == TextureType::ColorCube};
363 const char dref_swizzle{w_swizzle ? 'w' : 'z'};
364 ctx.Add("MOV.F {}.{},{};"
365 "TEX.F.LODCLAMP{} {},{},{},{},{}{};",
366 coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec,
367 texture, type, offset_vec);
368 } else {
369 ctx.Add("MOV.F {}.x,{};"
370 "MOV.F {}.y,{};"
371 "TEX.F.LODCLAMP{} {},{},{},{},{}{};",
372 staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec,
373 staging.reg, texture, type, offset_vec);
374 }
375 } else {
376 if (info.type != TextureType::ColorArrayCube) {
377 const bool w_swizzle{info.type == TextureType::ColorArray2D ||
378 info.type == TextureType::ColorCube};
379 const char dref_swizzle{w_swizzle ? 'w' : 'z'};
380 ctx.Add("MOV.F {}.{},{};"
381 "TEX.F{} {},{},{},{}{};",
382 coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, texture,
383 type, offset_vec);
384 } else {
385 ctx.Add("TEX.F{} {},{},{},{},{}{};", sparse_mod, ret, coord_vec, dref_val, texture,
386 type, offset_vec);
387 }
388 }
389 }
390 StoreSparse(ctx, sparse_inst);
391}
392
393void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
394 const IR::Value& coord, const IR::Value& dref,
395 const IR::Value& lod, const IR::Value& offset) {
396 // Allocate early to avoid aliases
397 const auto info{inst.Flags<IR::TextureInstInfo>()};
398 ScopedRegister staging;
399 if (info.type == TextureType::ColorArrayCube) {
400 staging = ScopedRegister{ctx.reg_alloc};
401 }
402 const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
403 const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)};
404 const auto sparse_inst{PrepareSparse(inst)};
405 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
406 const std::string_view type{TextureType(info)};
407 const std::string texture{Texture(ctx, info, index)};
408 const std::string offset_vec{Offset(ctx, offset)};
409 const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
410 const Register ret{ctx.reg_alloc.Define(inst)};
411 switch (info.type) {
412 case TextureType::Color1D:
413 case TextureType::ColorArray1D:
414 case TextureType::Color2D:
415 ctx.Add("MOV.F {}.z,{};"
416 "MOV.F {}.w,{};"
417 "TXL.F{} {},{},{},{}{};",
418 coord_vec, dref_val, coord_vec, lod_val, sparse_mod, ret, coord_vec, texture, type,
419 offset_vec);
420 break;
421 case TextureType::ColorArray2D:
422 case TextureType::ColorCube:
423 ctx.Add("MOV.F {}.w,{};"
424 "TXL.F{} {},{},{},{},{}{};",
425 coord_vec, dref_val, sparse_mod, ret, coord_vec, lod_val, texture, type,
426 offset_vec);
427 break;
428 case TextureType::ColorArrayCube:
429 ctx.Add("MOV.F {}.x,{};"
430 "MOV.F {}.y,{};"
431 "TXL.F{} {},{},{},{},{}{};",
432 staging.reg, dref_val, staging.reg, lod_val, sparse_mod, ret, coord_vec,
433 staging.reg, texture, type, offset_vec);
434 break;
435 default:
436 throw NotImplementedException("Invalid type {}", info.type.Value());
437 }
438 StoreSparse(ctx, sparse_inst);
439}
440
441void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
442 const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2) {
443 // Allocate offsets early so they don't overwrite any consumed register
444 const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
445 const auto info{inst.Flags<IR::TextureInstInfo>()};
446 const char comp{"xyzw"[info.gather_component]};
447 const auto sparse_inst{PrepareSparse(inst)};
448 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
449 const std::string_view type{TextureType(info)};
450 const std::string texture{Texture(ctx, info, index)};
451 const Register coord_vec{ctx.reg_alloc.Consume(coord)};
452 const Register ret{ctx.reg_alloc.Define(inst)};
453 if (offset2.IsEmpty()) {
454 const std::string offset_vec{Offset(ctx, offset)};
455 ctx.Add("TXG.F{} {},{},{}.{},{}{};", sparse_mod, ret, coord_vec, texture, comp, type,
456 offset_vec);
457 } else {
458 SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2);
459 ctx.Add("TXGO.F{} {},{},{},{},{}.{},{};", sparse_mod, ret, coord_vec, off_x.reg, off_y.reg,
460 texture, comp, type);
461 }
462 StoreSparse(ctx, sparse_inst);
463}
464
465void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
466 const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2,
467 const IR::Value& dref) {
468 // FIXME: This instruction is not working as expected
469
470 // Allocate offsets early so they don't overwrite any consumed register
471 const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
472 const auto info{inst.Flags<IR::TextureInstInfo>()};
473 const auto sparse_inst{PrepareSparse(inst)};
474 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
475 const std::string_view type{TextureType(info)};
476 const std::string texture{Texture(ctx, info, index)};
477 const Register coord_vec{ctx.reg_alloc.Consume(coord)};
478 const ScalarF32 dref_value{ctx.reg_alloc.Consume(dref)};
479 const Register ret{ctx.reg_alloc.Define(inst)};
480 std::string args;
481 switch (info.type) {
482 case TextureType::Color2D:
483 ctx.Add("MOV.F {}.z,{};", coord_vec, dref_value);
484 args = fmt::to_string(coord_vec);
485 break;
486 case TextureType::ColorArray2D:
487 case TextureType::ColorCube:
488 ctx.Add("MOV.F {}.w,{};", coord_vec, dref_value);
489 args = fmt::to_string(coord_vec);
490 break;
491 case TextureType::ColorArrayCube:
492 args = fmt::format("{},{}", coord_vec, dref_value);
493 break;
494 default:
495 throw NotImplementedException("Invalid type {}", info.type.Value());
496 }
497 if (offset2.IsEmpty()) {
498 const std::string offset_vec{Offset(ctx, offset)};
499 ctx.Add("TXG.F{} {},{},{},{}{};", sparse_mod, ret, args, texture, type, offset_vec);
500 } else {
501 SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2);
502 ctx.Add("TXGO.F{} {},{},{},{},{},{};", sparse_mod, ret, args, off_x.reg, off_y.reg, texture,
503 type);
504 }
505 StoreSparse(ctx, sparse_inst);
506}
507
508void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
509 const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) {
510 const auto info{inst.Flags<IR::TextureInstInfo>()};
511 const auto sparse_inst{PrepareSparse(inst)};
512 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
513 const std::string_view type{TextureType(info)};
514 const std::string texture{Texture(ctx, info, index)};
515 const std::string offset_vec{Offset(ctx, offset)};
516 const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
517 const Register ret{ctx.reg_alloc.Define(inst)};
518 if (info.type == TextureType::Buffer) {
519 ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec);
520 } else if (ms.type != Type::Void) {
521 ctx.Add("MOV.S {}.w,{};"
522 "TXFMS.F{} {},{},{},{}{};",
523 coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec);
524 } else {
525 ctx.Add("MOV.S {}.w,{};"
526 "TXF.F{} {},{},{},{}{};",
527 coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec);
528 }
529 StoreSparse(ctx, sparse_inst);
530}
531
532void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
533 ScalarS32 lod) {
534 const auto info{inst.Flags<IR::TextureInstInfo>()};
535 const std::string texture{Texture(ctx, info, index)};
536 const std::string_view type{TextureType(info)};
537 ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type);
538}
539
540void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
541 const auto info{inst.Flags<IR::TextureInstInfo>()};
542 const std::string texture{Texture(ctx, info, index)};
543 const std::string_view type{TextureType(info)};
544 ctx.Add("LOD.F {},{},{},{};", inst, coord, texture, type);
545}
546
547void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
548 const IR::Value& coord, const IR::Value& derivatives,
549 const IR::Value& offset, const IR::Value& lod_clamp) {
550 const auto info{inst.Flags<IR::TextureInstInfo>()};
551 ScopedRegister dpdx, dpdy;
552 const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
553 if (multi_component) {
554 // Allocate this early to avoid aliasing other registers
555 dpdx = ScopedRegister{ctx.reg_alloc};
556 dpdy = ScopedRegister{ctx.reg_alloc};
557 }
558 const auto sparse_inst{PrepareSparse(inst)};
559 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
560 const std::string_view type{TextureType(info)};
561 const std::string texture{Texture(ctx, info, index)};
562 const std::string offset_vec{GradOffset(offset)};
563 const Register coord_vec{ctx.reg_alloc.Consume(coord)};
564 const Register derivatives_vec{ctx.reg_alloc.Consume(derivatives)};
565 const Register ret{ctx.reg_alloc.Define(inst)};
566 if (multi_component) {
567 ctx.Add("MOV.F {}.x,{}.x;"
568 "MOV.F {}.y,{}.z;"
569 "MOV.F {}.x,{}.y;"
570 "MOV.F {}.y,{}.w;",
571 dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec,
572 dpdy.reg, derivatives_vec);
573 if (info.has_lod_clamp) {
574 const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)};
575 ctx.Add("MOV.F {}.w,{};"
576 "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};",
577 dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
578 texture, type, offset_vec);
579 } else {
580 ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
581 texture, type, offset_vec);
582 }
583 } else {
584 ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec,
585 derivatives_vec, texture, type, offset_vec);
586 }
587 StoreSparse(ctx, sparse_inst);
588}
589
590void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
591 const auto info{inst.Flags<IR::TextureInstInfo>()};
592 const auto sparse_inst{PrepareSparse(inst)};
593 const std::string_view format{FormatStorage(info.image_format)};
594 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
595 const std::string_view type{TextureType(info)};
596 const std::string image{Image(ctx, info, index)};
597 const Register ret{ctx.reg_alloc.Define(inst)};
598 ctx.Add("LOADIM.{}{} {},{},{},{};", format, sparse_mod, ret, coord, image, type);
599 StoreSparse(ctx, sparse_inst);
600}
601
602void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
603 Register color) {
604 const auto info{inst.Flags<IR::TextureInstInfo>()};
605 const std::string_view format{FormatStorage(info.image_format)};
606 const std::string_view type{TextureType(info)};
607 const std::string image{Image(ctx, info, index)};
608 ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type);
609}
610
611void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
612 ScalarU32 value) {
613 ImageAtomic(ctx, inst, index, coord, value, "ADD.U32");
614}
615
616void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
617 ScalarS32 value) {
618 ImageAtomic(ctx, inst, index, coord, value, "MIN.S32");
619}
620
621void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
622 ScalarU32 value) {
623 ImageAtomic(ctx, inst, index, coord, value, "MIN.U32");
624}
625
626void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
627 ScalarS32 value) {
628 ImageAtomic(ctx, inst, index, coord, value, "MAX.S32");
629}
630
631void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
632 ScalarU32 value) {
633 ImageAtomic(ctx, inst, index, coord, value, "MAX.U32");
634}
635
636void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
637 ScalarU32 value) {
638 ImageAtomic(ctx, inst, index, coord, value, "IWRAP.U32");
639}
640
641void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
642 ScalarU32 value) {
643 ImageAtomic(ctx, inst, index, coord, value, "DWRAP.U32");
644}
645
646void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
647 ScalarU32 value) {
648 ImageAtomic(ctx, inst, index, coord, value, "AND.U32");
649}
650
651void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
652 ScalarU32 value) {
653 ImageAtomic(ctx, inst, index, coord, value, "OR.U32");
654}
655
656void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
657 ScalarU32 value) {
658 ImageAtomic(ctx, inst, index, coord, value, "XOR.U32");
659}
660
661void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
662 Register coord, ScalarU32 value) {
663 ImageAtomic(ctx, inst, index, coord, value, "EXCH.U32");
664}
665
666void EmitBindlessImageSampleImplicitLod(EmitContext&) {
667 throw LogicError("Unreachable instruction");
668}
669
670void EmitBindlessImageSampleExplicitLod(EmitContext&) {
671 throw LogicError("Unreachable instruction");
672}
673
674void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
675 throw LogicError("Unreachable instruction");
676}
677
678void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
679 throw LogicError("Unreachable instruction");
680}
681
682void EmitBindlessImageGather(EmitContext&) {
683 throw LogicError("Unreachable instruction");
684}
685
686void EmitBindlessImageGatherDref(EmitContext&) {
687 throw LogicError("Unreachable instruction");
688}
689
690void EmitBindlessImageFetch(EmitContext&) {
691 throw LogicError("Unreachable instruction");
692}
693
694void EmitBindlessImageQueryDimensions(EmitContext&) {
695 throw LogicError("Unreachable instruction");
696}
697
698void EmitBindlessImageQueryLod(EmitContext&) {
699 throw LogicError("Unreachable instruction");
700}
701
702void EmitBindlessImageGradient(EmitContext&) {
703 throw LogicError("Unreachable instruction");
704}
705
706void EmitBindlessImageRead(EmitContext&) {
707 throw LogicError("Unreachable instruction");
708}
709
710void EmitBindlessImageWrite(EmitContext&) {
711 throw LogicError("Unreachable instruction");
712}
713
714void EmitBoundImageSampleImplicitLod(EmitContext&) {
715 throw LogicError("Unreachable instruction");
716}
717
718void EmitBoundImageSampleExplicitLod(EmitContext&) {
719 throw LogicError("Unreachable instruction");
720}
721
722void EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
723 throw LogicError("Unreachable instruction");
724}
725
726void EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
727 throw LogicError("Unreachable instruction");
728}
729
730void EmitBoundImageGather(EmitContext&) {
731 throw LogicError("Unreachable instruction");
732}
733
734void EmitBoundImageGatherDref(EmitContext&) {
735 throw LogicError("Unreachable instruction");
736}
737
738void EmitBoundImageFetch(EmitContext&) {
739 throw LogicError("Unreachable instruction");
740}
741
742void EmitBoundImageQueryDimensions(EmitContext&) {
743 throw LogicError("Unreachable instruction");
744}
745
746void EmitBoundImageQueryLod(EmitContext&) {
747 throw LogicError("Unreachable instruction");
748}
749
750void EmitBoundImageGradient(EmitContext&) {
751 throw LogicError("Unreachable instruction");
752}
753
754void EmitBoundImageRead(EmitContext&) {
755 throw LogicError("Unreachable instruction");
756}
757
758void EmitBoundImageWrite(EmitContext&) {
759 throw LogicError("Unreachable instruction");
760}
761
762void EmitBindlessImageAtomicIAdd32(EmitContext&) {
763 throw LogicError("Unreachable instruction");
764}
765
766void EmitBindlessImageAtomicSMin32(EmitContext&) {
767 throw LogicError("Unreachable instruction");
768}
769
770void EmitBindlessImageAtomicUMin32(EmitContext&) {
771 throw LogicError("Unreachable instruction");
772}
773
774void EmitBindlessImageAtomicSMax32(EmitContext&) {
775 throw LogicError("Unreachable instruction");
776}
777
778void EmitBindlessImageAtomicUMax32(EmitContext&) {
779 throw LogicError("Unreachable instruction");
780}
781
782void EmitBindlessImageAtomicInc32(EmitContext&) {
783 throw LogicError("Unreachable instruction");
784}
785
786void EmitBindlessImageAtomicDec32(EmitContext&) {
787 throw LogicError("Unreachable instruction");
788}
789
790void EmitBindlessImageAtomicAnd32(EmitContext&) {
791 throw LogicError("Unreachable instruction");
792}
793
794void EmitBindlessImageAtomicOr32(EmitContext&) {
795 throw LogicError("Unreachable instruction");
796}
797
798void EmitBindlessImageAtomicXor32(EmitContext&) {
799 throw LogicError("Unreachable instruction");
800}
801
802void EmitBindlessImageAtomicExchange32(EmitContext&) {
803 throw LogicError("Unreachable instruction");
804}
805
806void EmitBoundImageAtomicIAdd32(EmitContext&) {
807 throw LogicError("Unreachable instruction");
808}
809
810void EmitBoundImageAtomicSMin32(EmitContext&) {
811 throw LogicError("Unreachable instruction");
812}
813
814void EmitBoundImageAtomicUMin32(EmitContext&) {
815 throw LogicError("Unreachable instruction");
816}
817
818void EmitBoundImageAtomicSMax32(EmitContext&) {
819 throw LogicError("Unreachable instruction");
820}
821
822void EmitBoundImageAtomicUMax32(EmitContext&) {
823 throw LogicError("Unreachable instruction");
824}
825
826void EmitBoundImageAtomicInc32(EmitContext&) {
827 throw LogicError("Unreachable instruction");
828}
829
830void EmitBoundImageAtomicDec32(EmitContext&) {
831 throw LogicError("Unreachable instruction");
832}
833
834void EmitBoundImageAtomicAnd32(EmitContext&) {
835 throw LogicError("Unreachable instruction");
836}
837
838void EmitBoundImageAtomicOr32(EmitContext&) {
839 throw LogicError("Unreachable instruction");
840}
841
842void EmitBoundImageAtomicXor32(EmitContext&) {
843 throw LogicError("Unreachable instruction");
844}
845
846void EmitBoundImageAtomicExchange32(EmitContext&) {
847 throw LogicError("Unreachable instruction");
848}
849
850} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
new file mode 100644
index 000000000..12afda43b
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
@@ -0,0 +1,625 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/backend/glasm/reg_alloc.h"
9
10namespace Shader::IR {
11enum class Attribute : u64;
12enum class Patch : u64;
13class Inst;
14class Value;
15} // namespace Shader::IR
16
17namespace Shader::Backend::GLASM {
18
19class EmitContext;
20
21// Microinstruction emitters
22void EmitPhi(EmitContext& ctx, IR::Inst& inst);
23void EmitVoid(EmitContext& ctx);
24void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
25void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
26void EmitReference(EmitContext&, const IR::Value& value);
27void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value);
28void EmitJoin(EmitContext& ctx);
29void EmitDemoteToHelperInvocation(EmitContext& ctx);
30void EmitBarrier(EmitContext& ctx);
31void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
32void EmitDeviceMemoryBarrier(EmitContext& ctx);
33void EmitPrologue(EmitContext& ctx);
34void EmitEpilogue(EmitContext& ctx);
35void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream);
36void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);
37void EmitGetRegister(EmitContext& ctx);
38void EmitSetRegister(EmitContext& ctx);
39void EmitGetPred(EmitContext& ctx);
40void EmitSetPred(EmitContext& ctx);
41void EmitSetGotoVariable(EmitContext& ctx);
42void EmitGetGotoVariable(EmitContext& ctx);
43void EmitSetIndirectBranchVariable(EmitContext& ctx);
44void EmitGetIndirectBranchVariable(EmitContext& ctx);
45void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
46void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
47void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
48void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
49void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
50void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
51void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
52void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex);
53void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex);
54void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex);
55void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex);
56void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch);
57void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value);
58void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value);
59void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value);
60void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value);
61void EmitGetZFlag(EmitContext& ctx);
62void EmitGetSFlag(EmitContext& ctx);
63void EmitGetCFlag(EmitContext& ctx);
64void EmitGetOFlag(EmitContext& ctx);
65void EmitSetZFlag(EmitContext& ctx);
66void EmitSetSFlag(EmitContext& ctx);
67void EmitSetCFlag(EmitContext& ctx);
68void EmitSetOFlag(EmitContext& ctx);
69void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst);
70void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst);
71void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
72void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
73void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
74void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
75void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset);
76void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value);
77void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
78void EmitUndefU8(EmitContext& ctx, IR::Inst& inst);
79void EmitUndefU16(EmitContext& ctx, IR::Inst& inst);
80void EmitUndefU32(EmitContext& ctx, IR::Inst& inst);
81void EmitUndefU64(EmitContext& ctx, IR::Inst& inst);
82void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address);
83void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address);
84void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address);
85void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address);
86void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address);
87void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address);
88void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address);
89void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value);
90void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value);
91void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value);
92void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value);
93void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value);
94void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value);
95void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value);
96void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
97 ScalarU32 offset);
98void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
99 ScalarU32 offset);
100void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
101 ScalarU32 offset);
102void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
103 ScalarU32 offset);
104void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
105 ScalarU32 offset);
106void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
107 ScalarU32 offset);
108void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
109 ScalarU32 offset);
110void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
111 ScalarU32 value);
112void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
113 ScalarS32 value);
114void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
115 ScalarU32 value);
116void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
117 ScalarS32 value);
118void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
119 ScalarU32 value);
120void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
121 Register value);
122void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
123 Register value);
124void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
125void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
126void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
127void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
128void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
129void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
130void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
131void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
132void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
133void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
134void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value);
135void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value);
136void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
137 const IR::Value& e2);
138void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
139 const IR::Value& e2, const IR::Value& e3);
140void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
141 const IR::Value& e2, const IR::Value& e3, const IR::Value& e4);
142void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
143void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
144void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
145void EmitCompositeInsertU32x2(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
146void EmitCompositeInsertU32x3(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
147void EmitCompositeInsertU32x4(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
148void EmitCompositeConstructF16x2(EmitContext& ctx, Register e1, Register e2);
149void EmitCompositeConstructF16x3(EmitContext& ctx, Register e1, Register e2, Register e3);
150void EmitCompositeConstructF16x4(EmitContext& ctx, Register e1, Register e2, Register e3,
151 Register e4);
152void EmitCompositeExtractF16x2(EmitContext& ctx, Register composite, u32 index);
153void EmitCompositeExtractF16x3(EmitContext& ctx, Register composite, u32 index);
154void EmitCompositeExtractF16x4(EmitContext& ctx, Register composite, u32 index);
155void EmitCompositeInsertF16x2(EmitContext& ctx, Register composite, Register object, u32 index);
156void EmitCompositeInsertF16x3(EmitContext& ctx, Register composite, Register object, u32 index);
157void EmitCompositeInsertF16x4(EmitContext& ctx, Register composite, Register object, u32 index);
158void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
159 const IR::Value& e2);
160void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
161 const IR::Value& e2, const IR::Value& e3);
162void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
163 const IR::Value& e2, const IR::Value& e3, const IR::Value& e4);
164void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
165void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
166void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
167void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite,
168 ScalarF32 object, u32 index);
169void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite,
170 ScalarF32 object, u32 index);
171void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite,
172 ScalarF32 object, u32 index);
173void EmitCompositeConstructF64x2(EmitContext& ctx);
174void EmitCompositeConstructF64x3(EmitContext& ctx);
175void EmitCompositeConstructF64x4(EmitContext& ctx);
176void EmitCompositeExtractF64x2(EmitContext& ctx);
177void EmitCompositeExtractF64x3(EmitContext& ctx);
178void EmitCompositeExtractF64x4(EmitContext& ctx);
179void EmitCompositeInsertF64x2(EmitContext& ctx, Register composite, Register object, u32 index);
180void EmitCompositeInsertF64x3(EmitContext& ctx, Register composite, Register object, u32 index);
181void EmitCompositeInsertF64x4(EmitContext& ctx, Register composite, Register object, u32 index);
182void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
183 ScalarS32 false_value);
184void EmitSelectU8(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value);
185void EmitSelectU16(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value);
186void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
187 ScalarS32 false_value);
188void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value,
189 Register false_value);
190void EmitSelectF16(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value);
191void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
192 ScalarS32 false_value);
193void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value);
194void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
195void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
196void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
197void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
198void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
199void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
200void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value);
201void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value);
202void EmitPackFloat2x16(EmitContext& ctx, Register value);
203void EmitUnpackFloat2x16(EmitContext& ctx, Register value);
204void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value);
205void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value);
206void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value);
207void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value);
208void EmitGetZeroFromOp(EmitContext& ctx);
209void EmitGetSignFromOp(EmitContext& ctx);
210void EmitGetCarryFromOp(EmitContext& ctx);
211void EmitGetOverflowFromOp(EmitContext& ctx);
212void EmitGetSparseFromOp(EmitContext& ctx);
213void EmitGetInBoundsFromOp(EmitContext& ctx);
214void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, Register value);
215void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
216void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
217void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
218void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
219void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
220void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c);
221void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c);
222void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c);
223void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
224void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
225void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
226void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
227void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
228void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
229void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
230void EmitFPNeg16(EmitContext& ctx, Register value);
231void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value);
232void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value);
233void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
234void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
235void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
236void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
237void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
238void EmitFPRecip64(EmitContext& ctx, Register value);
239void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
240void EmitFPRecipSqrt64(EmitContext& ctx, Register value);
241void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
242void EmitFPSaturate16(EmitContext& ctx, Register value);
243void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
244void EmitFPSaturate64(EmitContext& ctx, Register value);
245void EmitFPClamp16(EmitContext& ctx, Register value, Register min_value, Register max_value);
246void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value,
247 ScalarF32 max_value);
248void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value,
249 ScalarF64 max_value);
250void EmitFPRoundEven16(EmitContext& ctx, Register value);
251void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
252void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
253void EmitFPFloor16(EmitContext& ctx, Register value);
254void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
255void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
256void EmitFPCeil16(EmitContext& ctx, Register value);
257void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
258void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
259void EmitFPTrunc16(EmitContext& ctx, Register value);
260void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
261void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
262void EmitFPOrdEqual16(EmitContext& ctx, Register lhs, Register rhs);
263void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
264void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
265void EmitFPUnordEqual16(EmitContext& ctx, Register lhs, Register rhs);
266void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
267void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
268void EmitFPOrdNotEqual16(EmitContext& ctx, Register lhs, Register rhs);
269void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
270void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
271void EmitFPUnordNotEqual16(EmitContext& ctx, Register lhs, Register rhs);
272void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
273void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
274void EmitFPOrdLessThan16(EmitContext& ctx, Register lhs, Register rhs);
275void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
276void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
277void EmitFPUnordLessThan16(EmitContext& ctx, Register lhs, Register rhs);
278void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
279void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
280void EmitFPOrdGreaterThan16(EmitContext& ctx, Register lhs, Register rhs);
281void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
282void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
283void EmitFPUnordGreaterThan16(EmitContext& ctx, Register lhs, Register rhs);
284void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
285void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
286void EmitFPOrdLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
287void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
288void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
289void EmitFPUnordLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
290void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
291void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
292void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
293void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
294void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
295void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
296void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
297void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
298void EmitFPIsNan16(EmitContext& ctx, Register value);
299void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
300void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
301void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
302void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
303void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
304void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
305void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
306void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
307void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value);
308void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
309void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift);
310void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, ScalarU32 shift);
311void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift);
312void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
313 ScalarU32 shift);
314void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift);
315void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
316 ScalarS32 shift);
317void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
318void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
319void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
320void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert,
321 ScalarS32 offset, ScalarS32 count);
322void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset,
323 ScalarS32 count);
324void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
325 ScalarU32 count);
326void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
327void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
328void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
329void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
330void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
331void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
332void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b);
333void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
334void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b);
335void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max);
336void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max);
337void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
338void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
339void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
340void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
341void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
342void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
343void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
344void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
345void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
346void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
347void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
348 ScalarU32 value);
349void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
350 ScalarS32 value);
351void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
352 ScalarU32 value);
353void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
354 ScalarS32 value);
355void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
356 ScalarU32 value);
357void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
358 ScalarU32 value);
359void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
360 ScalarU32 value);
361void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
362 ScalarU32 value);
363void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
364 ScalarU32 value);
365void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
366 ScalarU32 value);
367void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
368 ScalarU32 value);
369void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
370 Register value);
371void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
372 ScalarU32 offset, ScalarU32 value);
373void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
374 ScalarU32 offset, ScalarS32 value);
375void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
376 ScalarU32 offset, ScalarU32 value);
377void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
378 ScalarU32 offset, ScalarS32 value);
379void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
380 ScalarU32 offset, ScalarU32 value);
381void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
382 ScalarU32 offset, ScalarU32 value);
383void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
384 ScalarU32 offset, ScalarU32 value);
385void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
386 ScalarU32 offset, ScalarU32 value);
387void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
388 ScalarU32 offset, ScalarU32 value);
389void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
390 ScalarU32 offset, ScalarU32 value);
391void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
392 ScalarU32 offset, ScalarU32 value);
393void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
394 ScalarU32 offset, Register value);
395void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
396 ScalarU32 offset, Register value);
397void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
398 ScalarU32 offset, Register value);
399void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
400 ScalarU32 offset, Register value);
401void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
402 ScalarU32 offset, Register value);
403void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
404 ScalarU32 offset, Register value);
405void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
406 ScalarU32 offset, Register value);
407void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
408 ScalarU32 offset, Register value);
409void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
410 ScalarU32 offset, Register value);
411void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
412 ScalarU32 offset, ScalarF32 value);
413void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
414 ScalarU32 offset, Register value);
415void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
416 ScalarU32 offset, Register value);
417void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
418 ScalarU32 offset, Register value);
419void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
420 ScalarU32 offset, Register value);
421void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
422 ScalarU32 offset, Register value);
423void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
424 ScalarU32 offset, Register value);
425void EmitGlobalAtomicIAdd32(EmitContext& ctx);
426void EmitGlobalAtomicSMin32(EmitContext& ctx);
427void EmitGlobalAtomicUMin32(EmitContext& ctx);
428void EmitGlobalAtomicSMax32(EmitContext& ctx);
429void EmitGlobalAtomicUMax32(EmitContext& ctx);
430void EmitGlobalAtomicInc32(EmitContext& ctx);
431void EmitGlobalAtomicDec32(EmitContext& ctx);
432void EmitGlobalAtomicAnd32(EmitContext& ctx);
433void EmitGlobalAtomicOr32(EmitContext& ctx);
434void EmitGlobalAtomicXor32(EmitContext& ctx);
435void EmitGlobalAtomicExchange32(EmitContext& ctx);
436void EmitGlobalAtomicIAdd64(EmitContext& ctx);
437void EmitGlobalAtomicSMin64(EmitContext& ctx);
438void EmitGlobalAtomicUMin64(EmitContext& ctx);
439void EmitGlobalAtomicSMax64(EmitContext& ctx);
440void EmitGlobalAtomicUMax64(EmitContext& ctx);
441void EmitGlobalAtomicInc64(EmitContext& ctx);
442void EmitGlobalAtomicDec64(EmitContext& ctx);
443void EmitGlobalAtomicAnd64(EmitContext& ctx);
444void EmitGlobalAtomicOr64(EmitContext& ctx);
445void EmitGlobalAtomicXor64(EmitContext& ctx);
446void EmitGlobalAtomicExchange64(EmitContext& ctx);
447void EmitGlobalAtomicAddF32(EmitContext& ctx);
448void EmitGlobalAtomicAddF16x2(EmitContext& ctx);
449void EmitGlobalAtomicAddF32x2(EmitContext& ctx);
450void EmitGlobalAtomicMinF16x2(EmitContext& ctx);
451void EmitGlobalAtomicMinF32x2(EmitContext& ctx);
452void EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
453void EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
454void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
455void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
456void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
457void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
458void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value);
459void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
460void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
461void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value);
462void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
463void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
464void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value);
465void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
466void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
467void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value);
468void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
469void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
470void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value);
471void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
472void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
473void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value);
474void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
475void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
476void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
477void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value);
478void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
479void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value);
480void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
481void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
482void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value);
483void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value);
484void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
485void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value);
486void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value);
487void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value);
488void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
489void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value);
490void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value);
491void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value);
492void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
493void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value);
494void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value);
495void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value);
496void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
497void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value);
498void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value);
499void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value);
500void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
501void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value);
502void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value);
503void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value);
504void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
505void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value);
506void EmitBindlessImageSampleImplicitLod(EmitContext&);
507void EmitBindlessImageSampleExplicitLod(EmitContext&);
508void EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
509void EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
510void EmitBindlessImageGather(EmitContext&);
511void EmitBindlessImageGatherDref(EmitContext&);
512void EmitBindlessImageFetch(EmitContext&);
513void EmitBindlessImageQueryDimensions(EmitContext&);
514void EmitBindlessImageQueryLod(EmitContext&);
515void EmitBindlessImageGradient(EmitContext&);
516void EmitBindlessImageRead(EmitContext&);
517void EmitBindlessImageWrite(EmitContext&);
518void EmitBoundImageSampleImplicitLod(EmitContext&);
519void EmitBoundImageSampleExplicitLod(EmitContext&);
520void EmitBoundImageSampleDrefImplicitLod(EmitContext&);
521void EmitBoundImageSampleDrefExplicitLod(EmitContext&);
522void EmitBoundImageGather(EmitContext&);
523void EmitBoundImageGatherDref(EmitContext&);
524void EmitBoundImageFetch(EmitContext&);
525void EmitBoundImageQueryDimensions(EmitContext&);
526void EmitBoundImageQueryLod(EmitContext&);
527void EmitBoundImageGradient(EmitContext&);
528void EmitBoundImageRead(EmitContext&);
529void EmitBoundImageWrite(EmitContext&);
530void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
531 const IR::Value& coord, Register bias_lc, const IR::Value& offset);
532void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
533 const IR::Value& coord, ScalarF32 lod, const IR::Value& offset);
534void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
535 const IR::Value& coord, const IR::Value& dref,
536 const IR::Value& bias_lc, const IR::Value& offset);
537void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
538 const IR::Value& coord, const IR::Value& dref,
539 const IR::Value& lod, const IR::Value& offset);
540void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
541 const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2);
542void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
543 const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2,
544 const IR::Value& dref);
545void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
546 const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms);
547void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
548 ScalarS32 lod);
549void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
550void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
551 const IR::Value& coord, const IR::Value& derivatives,
552 const IR::Value& offset, const IR::Value& lod_clamp);
553void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
554void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
555 Register color);
556void EmitBindlessImageAtomicIAdd32(EmitContext&);
557void EmitBindlessImageAtomicSMin32(EmitContext&);
558void EmitBindlessImageAtomicUMin32(EmitContext&);
559void EmitBindlessImageAtomicSMax32(EmitContext&);
560void EmitBindlessImageAtomicUMax32(EmitContext&);
561void EmitBindlessImageAtomicInc32(EmitContext&);
562void EmitBindlessImageAtomicDec32(EmitContext&);
563void EmitBindlessImageAtomicAnd32(EmitContext&);
564void EmitBindlessImageAtomicOr32(EmitContext&);
565void EmitBindlessImageAtomicXor32(EmitContext&);
566void EmitBindlessImageAtomicExchange32(EmitContext&);
567void EmitBoundImageAtomicIAdd32(EmitContext&);
568void EmitBoundImageAtomicSMin32(EmitContext&);
569void EmitBoundImageAtomicUMin32(EmitContext&);
570void EmitBoundImageAtomicSMax32(EmitContext&);
571void EmitBoundImageAtomicUMax32(EmitContext&);
572void EmitBoundImageAtomicInc32(EmitContext&);
573void EmitBoundImageAtomicDec32(EmitContext&);
574void EmitBoundImageAtomicAnd32(EmitContext&);
575void EmitBoundImageAtomicOr32(EmitContext&);
576void EmitBoundImageAtomicXor32(EmitContext&);
577void EmitBoundImageAtomicExchange32(EmitContext&);
578void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
579 ScalarU32 value);
580void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
581 ScalarS32 value);
582void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
583 ScalarU32 value);
584void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
585 ScalarS32 value);
586void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
587 ScalarU32 value);
588void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
589 ScalarU32 value);
590void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
591 ScalarU32 value);
592void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
593 ScalarU32 value);
594void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
595 ScalarU32 value);
596void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
597 ScalarU32 value);
598void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
599 Register coord, ScalarU32 value);
600void EmitLaneId(EmitContext& ctx, IR::Inst& inst);
601void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
602void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
603void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
604void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
605void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst);
606void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst);
607void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst);
608void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst);
609void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst);
610void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
611 const IR::Value& clamp, const IR::Value& segmentation_mask);
612void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
613 const IR::Value& clamp, const IR::Value& segmentation_mask);
614void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
615 const IR::Value& clamp, const IR::Value& segmentation_mask);
616void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
617 const IR::Value& clamp, const IR::Value& segmentation_mask);
618void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b,
619 ScalarU32 swizzle);
620void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
621void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
622void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
623void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
624
625} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
new file mode 100644
index 000000000..f55c26b76
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
@@ -0,0 +1,294 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/glasm/emit_context.h"
6#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
7#include "shader_recompiler/frontend/ir/value.h"
8
9namespace Shader::Backend::GLASM {
10namespace {
11void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b,
12 std::string_view lop) {
13 const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp);
14 const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp);
15 if (zero) {
16 zero->Invalidate();
17 }
18 if (sign) {
19 sign->Invalidate();
20 }
21 if (zero || sign) {
22 ctx.reg_alloc.InvalidateConditionCodes();
23 }
24 const auto ret{ctx.reg_alloc.Define(inst)};
25 ctx.Add("{}.S {}.x,{},{};", lop, ret, a, b);
26 if (zero) {
27 ctx.Add("SEQ.S {},{},0;", *zero, ret);
28 }
29 if (sign) {
30 ctx.Add("SLT.S {},{},0;", *sign, ret);
31 }
32}
33} // Anonymous namespace
34
35void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
36 const std::array flags{
37 inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp),
38 inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp),
39 inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp),
40 inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp),
41 };
42 for (IR::Inst* const flag_inst : flags) {
43 if (flag_inst) {
44 flag_inst->Invalidate();
45 }
46 }
47 const bool cc{inst.HasAssociatedPseudoOperation()};
48 const std::string_view cc_mod{cc ? ".CC" : ""};
49 if (cc) {
50 ctx.reg_alloc.InvalidateConditionCodes();
51 }
52 const auto ret{ctx.reg_alloc.Define(inst)};
53 ctx.Add("ADD.S{} {}.x,{},{};", cc_mod, ret, a, b);
54 if (!cc) {
55 return;
56 }
57 static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"};
58 for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) {
59 if (!flags[flag_index]) {
60 continue;
61 }
62 const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])};
63 if (flag_index == 0) {
64 ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret);
65 } else {
66 // We could use conditional execution here, but it's broken on Nvidia's compiler
67 ctx.Add("IF {}.x;"
68 "MOV.S {}.x,-1;"
69 "ELSE;"
70 "MOV.S {}.x,0;"
71 "ENDIF;",
72 masks[flag_index], flag_ret, flag_ret);
73 }
74 }
75}
76
77void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) {
78 ctx.LongAdd("ADD.S64 {}.x,{}.x,{}.x;", inst, a, b);
79}
80
81void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
82 ctx.Add("SUB.S {}.x,{},{};", inst, a, b);
83}
84
85void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) {
86 ctx.LongAdd("SUB.S64 {}.x,{}.x,{}.x;", inst, a, b);
87}
88
89void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
90 ctx.Add("MUL.S {}.x,{},{};", inst, a, b);
91}
92
93void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
94 if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) {
95 ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32));
96 } else {
97 ctx.Add("MOV.S {},-{};", inst, value);
98 }
99}
100
101void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value) {
102 ctx.LongAdd("MOV.S64 {},-{};", inst, value);
103}
104
105void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
106 ctx.Add("ABS.S {},{};", inst, value);
107}
108
109void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) {
110 ctx.Add("SHL.U {}.x,{},{};", inst, base, shift);
111}
112
113void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
114 ScalarU32 shift) {
115 ctx.LongAdd("SHL.U64 {}.x,{},{};", inst, base, shift);
116}
117
118void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) {
119 ctx.Add("SHR.U {}.x,{},{};", inst, base, shift);
120}
121
122void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
123 ScalarU32 shift) {
124 ctx.LongAdd("SHR.U64 {}.x,{},{};", inst, base, shift);
125}
126
127void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift) {
128 ctx.Add("SHR.S {}.x,{},{};", inst, base, shift);
129}
130
131void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
132 ScalarS32 shift) {
133 ctx.LongAdd("SHR.S64 {}.x,{},{};", inst, base, shift);
134}
135
136void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
137 BitwiseLogicalOp(ctx, inst, a, b, "AND");
138}
139
140void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
141 BitwiseLogicalOp(ctx, inst, a, b, "OR");
142}
143
144void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
145 BitwiseLogicalOp(ctx, inst, a, b, "XOR");
146}
147
148void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert,
149 ScalarS32 offset, ScalarS32 count) {
150 const Register ret{ctx.reg_alloc.Define(inst)};
151 if (count.type != Type::Register && offset.type != Type::Register) {
152 ctx.Add("BFI.S {},{{{},{},0,0}},{},{};", ret, count, offset, insert, base);
153 } else {
154 ctx.Add("MOV.S RC.x,{};"
155 "MOV.S RC.y,{};"
156 "BFI.S {},RC,{},{};",
157 count, offset, ret, insert, base);
158 }
159}
160
161void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset,
162 ScalarS32 count) {
163 const Register ret{ctx.reg_alloc.Define(inst)};
164 if (count.type != Type::Register && offset.type != Type::Register) {
165 ctx.Add("BFE.S {},{{{},{},0,0}},{};", ret, count, offset, base);
166 } else {
167 ctx.Add("MOV.S RC.x,{};"
168 "MOV.S RC.y,{};"
169 "BFE.S {},RC,{};",
170 count, offset, ret, base);
171 }
172}
173
174void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
175 ScalarU32 count) {
176 const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp);
177 const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp);
178 if (zero) {
179 zero->Invalidate();
180 }
181 if (sign) {
182 sign->Invalidate();
183 }
184 if (zero || sign) {
185 ctx.reg_alloc.InvalidateConditionCodes();
186 }
187 const Register ret{ctx.reg_alloc.Define(inst)};
188 if (count.type != Type::Register && offset.type != Type::Register) {
189 ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base);
190 } else {
191 ctx.Add("MOV.U RC.x,{};"
192 "MOV.U RC.y,{};"
193 "BFE.U {},RC,{};",
194 count, offset, ret, base);
195 }
196 if (zero) {
197 ctx.Add("SEQ.S {},{},0;", *zero, ret);
198 }
199 if (sign) {
200 ctx.Add("SLT.S {},{},0;", *sign, ret);
201 }
202}
203
204void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
205 ctx.Add("BFR {},{};", inst, value);
206}
207
208void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
209 ctx.Add("BTC {},{};", inst, value);
210}
211
212void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
213 ctx.Add("NOT.S {},{};", inst, value);
214}
215
216void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
217 ctx.Add("BTFM.S {},{};", inst, value);
218}
219
220void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
221 ctx.Add("BTFM.U {},{};", inst, value);
222}
223
224void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
225 ctx.Add("MIN.S {},{},{};", inst, a, b);
226}
227
228void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) {
229 ctx.Add("MIN.U {},{},{};", inst, a, b);
230}
231
232void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
233 ctx.Add("MAX.S {},{},{};", inst, a, b);
234}
235
236void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) {
237 ctx.Add("MAX.U {},{},{};", inst, a, b);
238}
239
240void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max) {
241 const Register ret{ctx.reg_alloc.Define(inst)};
242 ctx.Add("MIN.S RC.x,{},{};"
243 "MAX.S {}.x,RC.x,{};",
244 max, value, ret, min);
245}
246
247void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max) {
248 const Register ret{ctx.reg_alloc.Define(inst)};
249 ctx.Add("MIN.U RC.x,{},{};"
250 "MAX.U {}.x,RC.x,{};",
251 max, value, ret, min);
252}
253
254void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
255 ctx.Add("SLT.S {}.x,{},{};", inst, lhs, rhs);
256}
257
258void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
259 ctx.Add("SLT.U {}.x,{},{};", inst, lhs, rhs);
260}
261
262void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
263 ctx.Add("SEQ.S {}.x,{},{};", inst, lhs, rhs);
264}
265
266void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
267 ctx.Add("SLE.S {}.x,{},{};", inst, lhs, rhs);
268}
269
270void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
271 ctx.Add("SLE.U {}.x,{},{};", inst, lhs, rhs);
272}
273
274void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
275 ctx.Add("SGT.S {}.x,{},{};", inst, lhs, rhs);
276}
277
278void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
279 ctx.Add("SGT.U {}.x,{},{};", inst, lhs, rhs);
280}
281
282void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
283 ctx.Add("SNE.U {}.x,{},{};", inst, lhs, rhs);
284}
285
286void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
287 ctx.Add("SGE.S {}.x,{},{};", inst, lhs, rhs);
288}
289
290void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
291 ctx.Add("SGE.U {}.x,{},{};", inst, lhs, rhs);
292}
293
294} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
new file mode 100644
index 000000000..af9fac7c1
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
@@ -0,0 +1,568 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glasm/emit_context.h"
8#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
9#include "shader_recompiler/frontend/ir/program.h"
10#include "shader_recompiler/frontend/ir/value.h"
11#include "shader_recompiler/runtime_info.h"
12
13namespace Shader::Backend::GLASM {
14namespace {
15void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
16 std::string_view then_expr, std::string_view else_expr = {}) {
17 // Operate on bindless SSBO, call the expression with bounds checking
18 // address = c[binding].xy
19 // length = c[binding].z
20 const u32 sb_binding{binding.U32()};
21 ctx.Add("PK64.U DC,c[{}];" // pointer = address
22 "CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset)
23 "ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset
24 "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length
25 sb_binding, offset, offset, sb_binding);
26 if (else_expr.empty()) {
27 ctx.Add("IF NE.x;{}ENDIF;", then_expr);
28 } else {
29 ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr);
30 }
31}
32
33void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr,
34 std::string_view else_expr = {}) {
35 const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()};
36 for (size_t index = 0; index < num_buffers; ++index) {
37 if (!ctx.info.nvn_buffer_used[index]) {
38 continue;
39 }
40 const auto& ssbo{ctx.info.storage_buffers_descriptors[index]};
41 ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr
42 "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32
43 "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32
44 "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size
45 "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 0
46 "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 0
47 "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b
48 "IF NE.x;" // if cond
49 "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr
50 ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address,
51 address, address);
52 if (pointer_based) {
53 ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf
54 "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset
55 "{}"
56 "ELSE;",
57 index, expr);
58 } else {
59 ctx.Add("CVT.U32.U64 RC.x,DC.x;"
60 "{},ssbo{}[RC.x];"
61 "ELSE;",
62 expr, index);
63 }
64 }
65 if (!else_expr.empty()) {
66 ctx.Add("{}", else_expr);
67 }
68 const size_t num_used_buffers{ctx.info.nvn_buffer_used.count()};
69 for (size_t index = 0; index < num_used_buffers; ++index) {
70 ctx.Add("ENDIF;");
71 }
72}
73
74template <typename ValueType>
75void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value,
76 std::string_view size) {
77 if (ctx.runtime_info.glasm_use_storage_buffers) {
78 ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset);
79 } else {
80 StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value));
81 }
82}
83
84void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
85 std::string_view size) {
86 const Register ret{ctx.reg_alloc.Define(inst)};
87 if (ctx.runtime_info.glasm_use_storage_buffers) {
88 ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset);
89 } else {
90 StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret),
91 fmt::format("MOV.U {},{{0,0,0,0}};", ret));
92 }
93}
94
95template <typename ValueType>
96void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) {
97 if (ctx.runtime_info.glasm_use_storage_buffers) {
98 GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value));
99 } else {
100 GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value));
101 }
102}
103
104void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) {
105 const Register ret{ctx.reg_alloc.Define(inst)};
106 if (ctx.runtime_info.glasm_use_storage_buffers) {
107 GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret));
108 } else {
109 GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret),
110 fmt::format("MOV.S {},0;", ret));
111 }
112}
113
114template <typename ValueType>
115void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
116 ValueType value, std::string_view operation, std::string_view size) {
117 const Register ret{ctx.reg_alloc.Define(inst)};
118 if (ctx.runtime_info.glasm_use_storage_buffers) {
119 ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(),
120 offset);
121 } else {
122 StorageOp(ctx, binding, offset,
123 fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value));
124 }
125}
126} // Anonymous namespace
127
128void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address) {
129 GlobalLoad(ctx, inst, address, "U8");
130}
131
132void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address) {
133 GlobalLoad(ctx, inst, address, "S8");
134}
135
136void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address) {
137 GlobalLoad(ctx, inst, address, "U16");
138}
139
140void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address) {
141 GlobalLoad(ctx, inst, address, "S16");
142}
143
144void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address) {
145 GlobalLoad(ctx, inst, address, "U32");
146}
147
148void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address) {
149 GlobalLoad(ctx, inst, address, "U32X2");
150}
151
152void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address) {
153 GlobalLoad(ctx, inst, address, "U32X4");
154}
155
156void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value) {
157 GlobalWrite(ctx, address, value, "U8");
158}
159
160void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value) {
161 GlobalWrite(ctx, address, value, "S8");
162}
163
164void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value) {
165 GlobalWrite(ctx, address, value, "U16");
166}
167
168void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value) {
169 GlobalWrite(ctx, address, value, "S16");
170}
171
172void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value) {
173 GlobalWrite(ctx, address, value, "U32");
174}
175
176void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value) {
177 GlobalWrite(ctx, address, value, "U32X2");
178}
179
180void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value) {
181 GlobalWrite(ctx, address, value, "U32X4");
182}
183
184void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
185 ScalarU32 offset) {
186 Load(ctx, inst, binding, offset, "U8");
187}
188
189void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
190 ScalarU32 offset) {
191 Load(ctx, inst, binding, offset, "S8");
192}
193
194void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
195 ScalarU32 offset) {
196 Load(ctx, inst, binding, offset, "U16");
197}
198
199void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
200 ScalarU32 offset) {
201 Load(ctx, inst, binding, offset, "S16");
202}
203
204void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
205 ScalarU32 offset) {
206 Load(ctx, inst, binding, offset, "U32");
207}
208
209void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
210 ScalarU32 offset) {
211 Load(ctx, inst, binding, offset, "U32X2");
212}
213
214void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
215 ScalarU32 offset) {
216 Load(ctx, inst, binding, offset, "U32X4");
217}
218
219void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
220 ScalarU32 value) {
221 Write(ctx, binding, offset, value, "U8");
222}
223
224void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
225 ScalarS32 value) {
226 Write(ctx, binding, offset, value, "S8");
227}
228
229void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
230 ScalarU32 value) {
231 Write(ctx, binding, offset, value, "U16");
232}
233
234void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
235 ScalarS32 value) {
236 Write(ctx, binding, offset, value, "S16");
237}
238
239void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
240 ScalarU32 value) {
241 Write(ctx, binding, offset, value, "U32");
242}
243
244void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
245 Register value) {
246 Write(ctx, binding, offset, value, "U32X2");
247}
248
249void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
250 Register value) {
251 Write(ctx, binding, offset, value, "U32X4");
252}
253
254void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
255 ScalarU32 value) {
256 ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
257}
258
259void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
260 ScalarS32 value) {
261 ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
262}
263
264void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
265 ScalarU32 value) {
266 ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
267}
268
269void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
270 ScalarS32 value) {
271 ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
272}
273
274void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
275 ScalarU32 value) {
276 ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
277}
278
279void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
280 ScalarU32 value) {
281 ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
282}
283
284void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
285 ScalarU32 value) {
286 ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
287}
288
289void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
290 ScalarU32 value) {
291 ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
292}
293
294void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
295 ScalarU32 value) {
296 ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
297}
298
299void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
300 ScalarU32 value) {
301 ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
302}
303
304void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
305 ScalarU32 value) {
306 ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
307}
308
309void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
310 Register value) {
311 ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset);
312}
313
314void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
315 ScalarU32 offset, ScalarU32 value) {
316 Atom(ctx, inst, binding, offset, value, "ADD", "U32");
317}
318
319void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
320 ScalarU32 offset, ScalarS32 value) {
321 Atom(ctx, inst, binding, offset, value, "MIN", "S32");
322}
323
324void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
325 ScalarU32 offset, ScalarU32 value) {
326 Atom(ctx, inst, binding, offset, value, "MIN", "U32");
327}
328
329void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
330 ScalarU32 offset, ScalarS32 value) {
331 Atom(ctx, inst, binding, offset, value, "MAX", "S32");
332}
333
334void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
335 ScalarU32 offset, ScalarU32 value) {
336 Atom(ctx, inst, binding, offset, value, "MAX", "U32");
337}
338
339void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
340 ScalarU32 offset, ScalarU32 value) {
341 Atom(ctx, inst, binding, offset, value, "IWRAP", "U32");
342}
343
344void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
345 ScalarU32 offset, ScalarU32 value) {
346 Atom(ctx, inst, binding, offset, value, "DWRAP", "U32");
347}
348
349void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
350 ScalarU32 offset, ScalarU32 value) {
351 Atom(ctx, inst, binding, offset, value, "AND", "U32");
352}
353
354void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
355 ScalarU32 offset, ScalarU32 value) {
356 Atom(ctx, inst, binding, offset, value, "OR", "U32");
357}
358
359void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
360 ScalarU32 offset, ScalarU32 value) {
361 Atom(ctx, inst, binding, offset, value, "XOR", "U32");
362}
363
364void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
365 ScalarU32 offset, ScalarU32 value) {
366 Atom(ctx, inst, binding, offset, value, "EXCH", "U32");
367}
368
369void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
370 ScalarU32 offset, Register value) {
371 Atom(ctx, inst, binding, offset, value, "ADD", "U64");
372}
373
374void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
375 ScalarU32 offset, Register value) {
376 Atom(ctx, inst, binding, offset, value, "MIN", "S64");
377}
378
379void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
380 ScalarU32 offset, Register value) {
381 Atom(ctx, inst, binding, offset, value, "MIN", "U64");
382}
383
384void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
385 ScalarU32 offset, Register value) {
386 Atom(ctx, inst, binding, offset, value, "MAX", "S64");
387}
388
389void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
390 ScalarU32 offset, Register value) {
391 Atom(ctx, inst, binding, offset, value, "MAX", "U64");
392}
393
394void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
395 ScalarU32 offset, Register value) {
396 Atom(ctx, inst, binding, offset, value, "AND", "U64");
397}
398
399void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
400 ScalarU32 offset, Register value) {
401 Atom(ctx, inst, binding, offset, value, "OR", "U64");
402}
403
404void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
405 ScalarU32 offset, Register value) {
406 Atom(ctx, inst, binding, offset, value, "XOR", "U64");
407}
408
409void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
410 ScalarU32 offset, Register value) {
411 Atom(ctx, inst, binding, offset, value, "EXCH", "U64");
412}
413
414void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
415 ScalarU32 offset, ScalarF32 value) {
416 Atom(ctx, inst, binding, offset, value, "ADD", "F32");
417}
418
419void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
420 ScalarU32 offset, Register value) {
421 Atom(ctx, inst, binding, offset, value, "ADD", "F16x2");
422}
423
424void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
425 [[maybe_unused]] const IR::Value& binding,
426 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
427 throw NotImplementedException("GLASM instruction");
428}
429
430void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
431 ScalarU32 offset, Register value) {
432 Atom(ctx, inst, binding, offset, value, "MIN", "F16x2");
433}
434
435void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
436 [[maybe_unused]] const IR::Value& binding,
437 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
438 throw NotImplementedException("GLASM instruction");
439}
440
441void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
442 ScalarU32 offset, Register value) {
443 Atom(ctx, inst, binding, offset, value, "MAX", "F16x2");
444}
445
446void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
447 [[maybe_unused]] const IR::Value& binding,
448 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
449 throw NotImplementedException("GLASM instruction");
450}
451
452void EmitGlobalAtomicIAdd32(EmitContext&) {
453 throw NotImplementedException("GLASM instruction");
454}
455
456void EmitGlobalAtomicSMin32(EmitContext&) {
457 throw NotImplementedException("GLASM instruction");
458}
459
460void EmitGlobalAtomicUMin32(EmitContext&) {
461 throw NotImplementedException("GLASM instruction");
462}
463
464void EmitGlobalAtomicSMax32(EmitContext&) {
465 throw NotImplementedException("GLASM instruction");
466}
467
468void EmitGlobalAtomicUMax32(EmitContext&) {
469 throw NotImplementedException("GLASM instruction");
470}
471
472void EmitGlobalAtomicInc32(EmitContext&) {
473 throw NotImplementedException("GLASM instruction");
474}
475
476void EmitGlobalAtomicDec32(EmitContext&) {
477 throw NotImplementedException("GLASM instruction");
478}
479
480void EmitGlobalAtomicAnd32(EmitContext&) {
481 throw NotImplementedException("GLASM instruction");
482}
483
484void EmitGlobalAtomicOr32(EmitContext&) {
485 throw NotImplementedException("GLASM instruction");
486}
487
488void EmitGlobalAtomicXor32(EmitContext&) {
489 throw NotImplementedException("GLASM instruction");
490}
491
492void EmitGlobalAtomicExchange32(EmitContext&) {
493 throw NotImplementedException("GLASM instruction");
494}
495
496void EmitGlobalAtomicIAdd64(EmitContext&) {
497 throw NotImplementedException("GLASM instruction");
498}
499
500void EmitGlobalAtomicSMin64(EmitContext&) {
501 throw NotImplementedException("GLASM instruction");
502}
503
504void EmitGlobalAtomicUMin64(EmitContext&) {
505 throw NotImplementedException("GLASM instruction");
506}
507
508void EmitGlobalAtomicSMax64(EmitContext&) {
509 throw NotImplementedException("GLASM instruction");
510}
511
512void EmitGlobalAtomicUMax64(EmitContext&) {
513 throw NotImplementedException("GLASM instruction");
514}
515
516void EmitGlobalAtomicInc64(EmitContext&) {
517 throw NotImplementedException("GLASM instruction");
518}
519
520void EmitGlobalAtomicDec64(EmitContext&) {
521 throw NotImplementedException("GLASM instruction");
522}
523
524void EmitGlobalAtomicAnd64(EmitContext&) {
525 throw NotImplementedException("GLASM instruction");
526}
527
528void EmitGlobalAtomicOr64(EmitContext&) {
529 throw NotImplementedException("GLASM instruction");
530}
531
532void EmitGlobalAtomicXor64(EmitContext&) {
533 throw NotImplementedException("GLASM instruction");
534}
535
536void EmitGlobalAtomicExchange64(EmitContext&) {
537 throw NotImplementedException("GLASM instruction");
538}
539
540void EmitGlobalAtomicAddF32(EmitContext&) {
541 throw NotImplementedException("GLASM instruction");
542}
543
544void EmitGlobalAtomicAddF16x2(EmitContext&) {
545 throw NotImplementedException("GLASM instruction");
546}
547
548void EmitGlobalAtomicAddF32x2(EmitContext&) {
549 throw NotImplementedException("GLASM instruction");
550}
551
552void EmitGlobalAtomicMinF16x2(EmitContext&) {
553 throw NotImplementedException("GLASM instruction");
554}
555
556void EmitGlobalAtomicMinF32x2(EmitContext&) {
557 throw NotImplementedException("GLASM instruction");
558}
559
560void EmitGlobalAtomicMaxF16x2(EmitContext&) {
561 throw NotImplementedException("GLASM instruction");
562}
563
564void EmitGlobalAtomicMaxF32x2(EmitContext&) {
565 throw NotImplementedException("GLASM instruction");
566}
567
568} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
new file mode 100644
index 000000000..ff64c6924
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
@@ -0,0 +1,273 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glasm/emit_context.h"
8#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
9#include "shader_recompiler/frontend/ir/program.h"
10#include "shader_recompiler/frontend/ir/value.h"
11
12#ifdef _MSC_VER
13#pragma warning(disable : 4100)
14#endif
15
16namespace Shader::Backend::GLASM {
17
18#define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__)
19
20static void DefinePhi(EmitContext& ctx, IR::Inst& phi) {
21 switch (phi.Arg(0).Type()) {
22 case IR::Type::U1:
23 case IR::Type::U32:
24 case IR::Type::F32:
25 ctx.reg_alloc.Define(phi);
26 break;
27 case IR::Type::U64:
28 case IR::Type::F64:
29 ctx.reg_alloc.LongDefine(phi);
30 break;
31 default:
32 throw NotImplementedException("Phi node type {}", phi.Type());
33 }
34}
35
36void EmitPhi(EmitContext& ctx, IR::Inst& phi) {
37 const size_t num_args{phi.NumArgs()};
38 for (size_t i = 0; i < num_args; ++i) {
39 ctx.reg_alloc.Consume(phi.Arg(i));
40 }
41 if (!phi.Definition<Id>().is_valid) {
42 // The phi node wasn't forward defined
43 DefinePhi(ctx, phi);
44 }
45}
46
47void EmitVoid(EmitContext&) {}
48
49void EmitReference(EmitContext& ctx, const IR::Value& value) {
50 ctx.reg_alloc.Consume(value);
51}
52
53void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) {
54 IR::Inst& phi{RegAlloc::AliasInst(*phi_value.Inst())};
55 if (!phi.Definition<Id>().is_valid) {
56 // The phi node wasn't forward defined
57 DefinePhi(ctx, phi);
58 }
59 const Register phi_reg{ctx.reg_alloc.Consume(IR::Value{&phi})};
60 const Value eval_value{ctx.reg_alloc.Consume(value)};
61
62 if (phi_reg == eval_value) {
63 return;
64 }
65 switch (phi.Flags<IR::Type>()) {
66 case IR::Type::U1:
67 case IR::Type::U32:
68 case IR::Type::F32:
69 ctx.Add("MOV.S {}.x,{};", phi_reg, ScalarS32{eval_value});
70 break;
71 case IR::Type::U64:
72 case IR::Type::F64:
73 ctx.Add("MOV.U64 {}.x,{};", phi_reg, ScalarRegister{eval_value});
74 break;
75 default:
76 throw NotImplementedException("Phi node type {}", phi.Type());
77 }
78}
79
80void EmitJoin(EmitContext& ctx) {
81 NotImplemented();
82}
83
84void EmitDemoteToHelperInvocation(EmitContext& ctx) {
85 ctx.Add("KIL TR.x;");
86}
87
88void EmitBarrier(EmitContext& ctx) {
89 ctx.Add("BAR;");
90}
91
92void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
93 ctx.Add("MEMBAR.CTA;");
94}
95
96void EmitDeviceMemoryBarrier(EmitContext& ctx) {
97 ctx.Add("MEMBAR;");
98}
99
100void EmitPrologue(EmitContext& ctx) {
101 // TODO
102}
103
104void EmitEpilogue(EmitContext& ctx) {
105 // TODO
106}
107
108void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream) {
109 if (stream.type == Type::U32 && stream.imm_u32 == 0) {
110 ctx.Add("EMIT;");
111 } else {
112 ctx.Add("EMITS {};", stream);
113 }
114}
115
116void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
117 if (!stream.IsImmediate()) {
118 LOG_WARNING(Shader_GLASM, "Stream is not immediate");
119 }
120 ctx.reg_alloc.Consume(stream);
121 ctx.Add("ENDPRIM;");
122}
123
124void EmitGetRegister(EmitContext& ctx) {
125 NotImplemented();
126}
127
128void EmitSetRegister(EmitContext& ctx) {
129 NotImplemented();
130}
131
132void EmitGetPred(EmitContext& ctx) {
133 NotImplemented();
134}
135
136void EmitSetPred(EmitContext& ctx) {
137 NotImplemented();
138}
139
140void EmitSetGotoVariable(EmitContext& ctx) {
141 NotImplemented();
142}
143
144void EmitGetGotoVariable(EmitContext& ctx) {
145 NotImplemented();
146}
147
148void EmitSetIndirectBranchVariable(EmitContext& ctx) {
149 NotImplemented();
150}
151
152void EmitGetIndirectBranchVariable(EmitContext& ctx) {
153 NotImplemented();
154}
155
156void EmitGetZFlag(EmitContext& ctx) {
157 NotImplemented();
158}
159
160void EmitGetSFlag(EmitContext& ctx) {
161 NotImplemented();
162}
163
164void EmitGetCFlag(EmitContext& ctx) {
165 NotImplemented();
166}
167
168void EmitGetOFlag(EmitContext& ctx) {
169 NotImplemented();
170}
171
172void EmitSetZFlag(EmitContext& ctx) {
173 NotImplemented();
174}
175
176void EmitSetSFlag(EmitContext& ctx) {
177 NotImplemented();
178}
179
180void EmitSetCFlag(EmitContext& ctx) {
181 NotImplemented();
182}
183
184void EmitSetOFlag(EmitContext& ctx) {
185 NotImplemented();
186}
187
188void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) {
189 ctx.Add("MOV.S {},invocation.groupid;", inst);
190}
191
192void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) {
193 ctx.Add("MOV.S {},invocation.localid;", inst);
194}
195
196void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) {
197 ctx.Add("MOV.S {}.x,primitive_invocation.x;", inst);
198}
199
200void EmitSampleId(EmitContext& ctx, IR::Inst& inst) {
201 ctx.Add("MOV.S {}.x,fragment.sampleid.x;", inst);
202}
203
204void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) {
205 ctx.Add("MOV.S {}.x,fragment.helperthread.x;", inst);
206}
207
208void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
209 ctx.uses_y_direction = true;
210 ctx.Add("MOV.F {}.x,y_direction[0].w;", inst);
211}
212
213void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) {
214 ctx.Add("MOV.S {}.x,0;", inst);
215}
216
217void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) {
218 ctx.Add("MOV.S {}.x,0;", inst);
219}
220
221void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) {
222 ctx.Add("MOV.S {}.x,0;", inst);
223}
224
225void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) {
226 ctx.Add("MOV.S {}.x,0;", inst);
227}
228
229void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) {
230 ctx.LongAdd("MOV.S64 {}.x,0;", inst);
231}
232
233void EmitGetZeroFromOp(EmitContext& ctx) {
234 NotImplemented();
235}
236
237void EmitGetSignFromOp(EmitContext& ctx) {
238 NotImplemented();
239}
240
241void EmitGetCarryFromOp(EmitContext& ctx) {
242 NotImplemented();
243}
244
245void EmitGetOverflowFromOp(EmitContext& ctx) {
246 NotImplemented();
247}
248
249void EmitGetSparseFromOp(EmitContext& ctx) {
250 NotImplemented();
251}
252
253void EmitGetInBoundsFromOp(EmitContext& ctx) {
254 NotImplemented();
255}
256
257void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
258 ctx.Add("OR.S {},{},{};", inst, a, b);
259}
260
261void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
262 ctx.Add("AND.S {},{},{};", inst, a, b);
263}
264
265void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
266 ctx.Add("XOR.S {},{},{};", inst, a, b);
267}
268
269void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
270 ctx.Add("SEQ.S {},{},0;", inst, value);
271}
272
273} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp
new file mode 100644
index 000000000..68fff613c
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp
@@ -0,0 +1,67 @@
1
2// Copyright 2021 yuzu Emulator Project
3// Licensed under GPLv2 or any later version
4// Refer to the license.txt file included.
5
6#include "shader_recompiler/backend/glasm/emit_context.h"
7#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
8#include "shader_recompiler/frontend/ir/value.h"
9
10namespace Shader::Backend::GLASM {
11
12void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
13 ScalarS32 false_value) {
14 ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
15}
16
17void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
18 [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) {
19 throw NotImplementedException("GLASM instruction");
20}
21
22void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
23 [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) {
24 throw NotImplementedException("GLASM instruction");
25}
26
27void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
28 ScalarS32 false_value) {
29 ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
30}
31
32void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value,
33 Register false_value) {
34 ctx.reg_alloc.InvalidateConditionCodes();
35 const Register ret{ctx.reg_alloc.LongDefine(inst)};
36 if (ret == true_value) {
37 ctx.Add("MOV.S.CC RC.x,{};"
38 "MOV.U64 {}.x(EQ.x),{};",
39 cond, ret, false_value);
40 } else if (ret == false_value) {
41 ctx.Add("MOV.S.CC RC.x,{};"
42 "MOV.U64 {}.x(NE.x),{};",
43 cond, ret, true_value);
44 } else {
45 ctx.Add("MOV.S.CC RC.x,{};"
46 "MOV.U64 {}.x,{};"
47 "MOV.U64 {}.x(NE.x),{};",
48 cond, ret, false_value, ret, true_value);
49 }
50}
51
52void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
53 [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) {
54 throw NotImplementedException("GLASM instruction");
55}
56
57void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
58 ScalarS32 false_value) {
59 ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
60}
61
62void EmitSelectF64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
63 [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) {
64 throw NotImplementedException("GLASM instruction");
65}
66
67} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp
new file mode 100644
index 000000000..c1498f449
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp
@@ -0,0 +1,58 @@
1
2// Copyright 2021 yuzu Emulator Project
3// Licensed under GPLv2 or any later version
4// Refer to the license.txt file included.
5
6#include "shader_recompiler/backend/glasm/emit_context.h"
7#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
8#include "shader_recompiler/frontend/ir/value.h"
9
10namespace Shader::Backend::GLASM {
11void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
12 ctx.Add("LDS.U8 {},shared_mem[{}];", inst, offset);
13}
14
15void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
16 ctx.Add("LDS.S8 {},shared_mem[{}];", inst, offset);
17}
18
19void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
20 ctx.Add("LDS.U16 {},shared_mem[{}];", inst, offset);
21}
22
23void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
24 ctx.Add("LDS.S16 {},shared_mem[{}];", inst, offset);
25}
26
27void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
28 ctx.Add("LDS.U32 {},shared_mem[{}];", inst, offset);
29}
30
31void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
32 ctx.Add("LDS.U32X2 {},shared_mem[{}];", inst, offset);
33}
34
35void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
36 ctx.Add("LDS.U32X4 {},shared_mem[{}];", inst, offset);
37}
38
39void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
40 ctx.Add("STS.U8 {},shared_mem[{}];", value, offset);
41}
42
43void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
44 ctx.Add("STS.U16 {},shared_mem[{}];", value, offset);
45}
46
47void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
48 ctx.Add("STS.U32 {},shared_mem[{}];", value, offset);
49}
50
51void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value) {
52 ctx.Add("STS.U32X2 {},shared_mem[{}];", value, offset);
53}
54
55void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value) {
56 ctx.Add("STS.U32X4 {},shared_mem[{}];", value, offset);
57}
58} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp
new file mode 100644
index 000000000..544d475b4
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp
@@ -0,0 +1,150 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/glasm/emit_context.h"
6#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
7#include "shader_recompiler/frontend/ir/value.h"
8#include "shader_recompiler/profile.h"
9
10namespace Shader::Backend::GLASM {
11
12void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
13 ctx.Add("MOV.S {}.x,{}.threadid;", inst, ctx.stage_name);
14}
15
16void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
17 ctx.Add("TGALL.S {}.x,{};", inst, pred);
18}
19
20void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
21 ctx.Add("TGANY.S {}.x,{};", inst, pred);
22}
23
24void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
25 ctx.Add("TGEQ.S {}.x,{};", inst, pred);
26}
27
28void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
29 ctx.Add("TGBALLOT {}.x,{};", inst, pred);
30}
31
32void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
33 ctx.Add("MOV.U {},{}.threadeqmask;", inst, ctx.stage_name);
34}
35
36void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
37 ctx.Add("MOV.U {},{}.threadltmask;", inst, ctx.stage_name);
38}
39
40void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
41 ctx.Add("MOV.U {},{}.threadlemask;", inst, ctx.stage_name);
42}
43
44void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
45 ctx.Add("MOV.U {},{}.threadgtmask;", inst, ctx.stage_name);
46}
47
48void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
49 ctx.Add("MOV.U {},{}.threadgemask;", inst, ctx.stage_name);
50}
51
52static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
53 const IR::Value& clamp, const IR::Value& segmentation_mask,
54 std::string_view op) {
55 IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
56 if (in_bounds) {
57 in_bounds->Invalidate();
58 }
59 std::string mask;
60 if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) {
61 mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8));
62 } else {
63 mask = "RC";
64 ctx.Add("BFI.U RC.x,{{5,8,0,0}},{},{};",
65 ScalarU32{ctx.reg_alloc.Consume(segmentation_mask)},
66 ScalarU32{ctx.reg_alloc.Consume(clamp)});
67 }
68 const Register value_ret{ctx.reg_alloc.Define(inst)};
69 if (in_bounds) {
70 const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)};
71 ctx.Add("SHF{}.U {},{},{},{};"
72 "MOV.U {}.x,{}.y;",
73 op, bounds_ret, value, index, mask, value_ret, bounds_ret);
74 } else {
75 ctx.Add("SHF{}.U {},{},{},{};"
76 "MOV.U {}.x,{}.y;",
77 op, value_ret, value, index, mask, value_ret, value_ret);
78 }
79}
80
81void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
82 const IR::Value& clamp, const IR::Value& segmentation_mask) {
83 Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "IDX");
84}
85
86void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
87 const IR::Value& clamp, const IR::Value& segmentation_mask) {
88 Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "UP");
89}
90
91void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
92 const IR::Value& clamp, const IR::Value& segmentation_mask) {
93 Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "DOWN");
94}
95
96void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
97 const IR::Value& clamp, const IR::Value& segmentation_mask) {
98 Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR");
99}
100
101void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b,
102 ScalarU32 swizzle) {
103 const auto ret{ctx.reg_alloc.Define(inst)};
104 ctx.Add("AND.U RC.z,{}.threadid,3;"
105 "SHL.U RC.z,RC.z,1;"
106 "SHR.U RC.z,{},RC.z;"
107 "AND.U RC.z,RC.z,3;"
108 "MUL.F RC.x,{},FSWZA[RC.z];"
109 "MUL.F RC.y,{},FSWZB[RC.z];"
110 "ADD.F {}.x,RC.x,RC.y;",
111 ctx.stage_name, swizzle, op_a, op_b, ret);
112}
113
114void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
115 if (ctx.profile.support_derivative_control) {
116 ctx.Add("DDX.FINE {}.x,{};", inst, p);
117 } else {
118 LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device");
119 ctx.Add("DDX {}.x,{};", inst, p);
120 }
121}
122
123void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
124 if (ctx.profile.support_derivative_control) {
125 ctx.Add("DDY.FINE {}.x,{};", inst, p);
126 } else {
127 LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device");
128 ctx.Add("DDY {}.x,{};", inst, p);
129 }
130}
131
132void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
133 if (ctx.profile.support_derivative_control) {
134 ctx.Add("DDX.COARSE {}.x,{};", inst, p);
135 } else {
136 LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device");
137 ctx.Add("DDX {}.x,{};", inst, p);
138 }
139}
140
141void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
142 if (ctx.profile.support_derivative_control) {
143 ctx.Add("DDY.COARSE {}.x,{};", inst, p);
144 } else {
145 LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device");
146 ctx.Add("DDY {}.x,{};", inst, p);
147 }
148}
149
150} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp
new file mode 100644
index 000000000..4c046db6e
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp
@@ -0,0 +1,186 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6
7#include <fmt/format.h>
8
9#include "shader_recompiler/backend/glasm/emit_context.h"
10#include "shader_recompiler/backend/glasm/reg_alloc.h"
11#include "shader_recompiler/exception.h"
12#include "shader_recompiler/frontend/ir/value.h"
13
14namespace Shader::Backend::GLASM {
15
16Register RegAlloc::Define(IR::Inst& inst) {
17 return Define(inst, false);
18}
19
20Register RegAlloc::LongDefine(IR::Inst& inst) {
21 return Define(inst, true);
22}
23
24Value RegAlloc::Peek(const IR::Value& value) {
25 if (value.IsImmediate()) {
26 return MakeImm(value);
27 } else {
28 return PeekInst(*value.Inst());
29 }
30}
31
32Value RegAlloc::Consume(const IR::Value& value) {
33 if (value.IsImmediate()) {
34 return MakeImm(value);
35 } else {
36 return ConsumeInst(*value.Inst());
37 }
38}
39
40void RegAlloc::Unref(IR::Inst& inst) {
41 IR::Inst& value_inst{AliasInst(inst)};
42 value_inst.DestructiveRemoveUsage();
43 if (!value_inst.HasUses()) {
44 Free(value_inst.Definition<Id>());
45 }
46}
47
48Register RegAlloc::AllocReg() {
49 Register ret;
50 ret.type = Type::Register;
51 ret.id = Alloc(false);
52 return ret;
53}
54
55Register RegAlloc::AllocLongReg() {
56 Register ret;
57 ret.type = Type::Register;
58 ret.id = Alloc(true);
59 return ret;
60}
61
62void RegAlloc::FreeReg(Register reg) {
63 Free(reg.id);
64}
65
66Value RegAlloc::MakeImm(const IR::Value& value) {
67 Value ret;
68 switch (value.Type()) {
69 case IR::Type::Void:
70 ret.type = Type::Void;
71 break;
72 case IR::Type::U1:
73 ret.type = Type::U32;
74 ret.imm_u32 = value.U1() ? 0xffffffff : 0;
75 break;
76 case IR::Type::U32:
77 ret.type = Type::U32;
78 ret.imm_u32 = value.U32();
79 break;
80 case IR::Type::F32:
81 ret.type = Type::U32;
82 ret.imm_u32 = Common::BitCast<u32>(value.F32());
83 break;
84 case IR::Type::U64:
85 ret.type = Type::U64;
86 ret.imm_u64 = value.U64();
87 break;
88 case IR::Type::F64:
89 ret.type = Type::U64;
90 ret.imm_u64 = Common::BitCast<u64>(value.F64());
91 break;
92 default:
93 throw NotImplementedException("Immediate type {}", value.Type());
94 }
95 return ret;
96}
97
98Register RegAlloc::Define(IR::Inst& inst, bool is_long) {
99 if (inst.HasUses()) {
100 inst.SetDefinition<Id>(Alloc(is_long));
101 } else {
102 Id id{};
103 id.is_long.Assign(is_long ? 1 : 0);
104 id.is_null.Assign(1);
105 inst.SetDefinition<Id>(id);
106 }
107 return Register{PeekInst(inst)};
108}
109
110Value RegAlloc::PeekInst(IR::Inst& inst) {
111 Value ret;
112 ret.type = Type::Register;
113 ret.id = inst.Definition<Id>();
114 return ret;
115}
116
117Value RegAlloc::ConsumeInst(IR::Inst& inst) {
118 Unref(inst);
119 return PeekInst(inst);
120}
121
122Id RegAlloc::Alloc(bool is_long) {
123 size_t& num_regs{is_long ? num_used_long_registers : num_used_registers};
124 std::bitset<NUM_REGS>& use{is_long ? long_register_use : register_use};
125 if (num_used_registers + num_used_long_registers < NUM_REGS) {
126 for (size_t reg = 0; reg < NUM_REGS; ++reg) {
127 if (use[reg]) {
128 continue;
129 }
130 num_regs = std::max(num_regs, reg + 1);
131 use[reg] = true;
132 Id ret{};
133 ret.is_valid.Assign(1);
134 ret.is_long.Assign(is_long ? 1 : 0);
135 ret.is_spill.Assign(0);
136 ret.is_condition_code.Assign(0);
137 ret.is_null.Assign(0);
138 ret.index.Assign(static_cast<u32>(reg));
139 return ret;
140 }
141 }
142 throw NotImplementedException("Register spilling");
143}
144
145void RegAlloc::Free(Id id) {
146 if (id.is_valid == 0) {
147 throw LogicError("Freeing invalid register");
148 }
149 if (id.is_spill != 0) {
150 throw NotImplementedException("Free spill");
151 }
152 if (id.is_long != 0) {
153 long_register_use[id.index] = false;
154 } else {
155 register_use[id.index] = false;
156 }
157}
158
159/*static*/ bool RegAlloc::IsAliased(const IR::Inst& inst) {
160 switch (inst.GetOpcode()) {
161 case IR::Opcode::Identity:
162 case IR::Opcode::BitCastU16F16:
163 case IR::Opcode::BitCastU32F32:
164 case IR::Opcode::BitCastU64F64:
165 case IR::Opcode::BitCastF16U16:
166 case IR::Opcode::BitCastF32U32:
167 case IR::Opcode::BitCastF64U64:
168 return true;
169 default:
170 return false;
171 }
172}
173
174/*static*/ IR::Inst& RegAlloc::AliasInst(IR::Inst& inst) {
175 IR::Inst* it{&inst};
176 while (IsAliased(*it)) {
177 const IR::Value arg{it->Arg(0)};
178 if (arg.IsImmediate()) {
179 break;
180 }
181 it = arg.InstRecursive();
182 }
183 return *it;
184}
185
186} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h
new file mode 100644
index 000000000..82aec66c6
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/reg_alloc.h
@@ -0,0 +1,303 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <bitset>
8
9#include <fmt/format.h>
10
11#include "common/bit_cast.h"
12#include "common/bit_field.h"
13#include "common/common_types.h"
14#include "shader_recompiler/exception.h"
15
16namespace Shader::IR {
17class Inst;
18class Value;
19} // namespace Shader::IR
20
21namespace Shader::Backend::GLASM {
22
23class EmitContext;
24
25enum class Type : u32 {
26 Void,
27 Register,
28 U32,
29 U64,
30};
31
32struct Id {
33 union {
34 u32 raw;
35 BitField<0, 1, u32> is_valid;
36 BitField<1, 1, u32> is_long;
37 BitField<2, 1, u32> is_spill;
38 BitField<3, 1, u32> is_condition_code;
39 BitField<4, 1, u32> is_null;
40 BitField<5, 27, u32> index;
41 };
42
43 bool operator==(Id rhs) const noexcept {
44 return raw == rhs.raw;
45 }
46 bool operator!=(Id rhs) const noexcept {
47 return !operator==(rhs);
48 }
49};
50static_assert(sizeof(Id) == sizeof(u32));
51
52struct Value {
53 Type type;
54 union {
55 Id id;
56 u32 imm_u32;
57 u64 imm_u64;
58 };
59
60 bool operator==(const Value& rhs) const noexcept {
61 if (type != rhs.type) {
62 return false;
63 }
64 switch (type) {
65 case Type::Void:
66 return true;
67 case Type::Register:
68 return id == rhs.id;
69 case Type::U32:
70 return imm_u32 == rhs.imm_u32;
71 case Type::U64:
72 return imm_u64 == rhs.imm_u64;
73 }
74 return false;
75 }
76 bool operator!=(const Value& rhs) const noexcept {
77 return !operator==(rhs);
78 }
79};
80struct Register : Value {};
81struct ScalarRegister : Value {};
82struct ScalarU32 : Value {};
83struct ScalarS32 : Value {};
84struct ScalarF32 : Value {};
85struct ScalarF64 : Value {};
86
87class RegAlloc {
88public:
89 RegAlloc() = default;
90
91 Register Define(IR::Inst& inst);
92
93 Register LongDefine(IR::Inst& inst);
94
95 [[nodiscard]] Value Peek(const IR::Value& value);
96
97 Value Consume(const IR::Value& value);
98
99 void Unref(IR::Inst& inst);
100
101 [[nodiscard]] Register AllocReg();
102
103 [[nodiscard]] Register AllocLongReg();
104
105 void FreeReg(Register reg);
106
107 void InvalidateConditionCodes() {
108 // This does nothing for now
109 }
110
111 [[nodiscard]] size_t NumUsedRegisters() const noexcept {
112 return num_used_registers;
113 }
114
115 [[nodiscard]] size_t NumUsedLongRegisters() const noexcept {
116 return num_used_long_registers;
117 }
118
119 [[nodiscard]] bool IsEmpty() const noexcept {
120 return register_use.none() && long_register_use.none();
121 }
122
123 /// Returns true if the instruction is expected to be aliased to another
124 static bool IsAliased(const IR::Inst& inst);
125
126 /// Returns the underlying value out of an alias sequence
127 static IR::Inst& AliasInst(IR::Inst& inst);
128
129private:
130 static constexpr size_t NUM_REGS = 4096;
131 static constexpr size_t NUM_ELEMENTS = 4;
132
133 Value MakeImm(const IR::Value& value);
134
135 Register Define(IR::Inst& inst, bool is_long);
136
137 Value PeekInst(IR::Inst& inst);
138
139 Value ConsumeInst(IR::Inst& inst);
140
141 Id Alloc(bool is_long);
142
143 void Free(Id id);
144
145 size_t num_used_registers{};
146 size_t num_used_long_registers{};
147 std::bitset<NUM_REGS> register_use{};
148 std::bitset<NUM_REGS> long_register_use{};
149};
150
151template <bool scalar, typename FormatContext>
152auto FormatTo(FormatContext& ctx, Id id) {
153 if (id.is_condition_code != 0) {
154 throw NotImplementedException("Condition code emission");
155 }
156 if (id.is_spill != 0) {
157 throw NotImplementedException("Spill emission");
158 }
159 if constexpr (scalar) {
160 if (id.is_null != 0) {
161 return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x");
162 }
163 if (id.is_long != 0) {
164 return fmt::format_to(ctx.out(), "D{}.x", id.index.Value());
165 } else {
166 return fmt::format_to(ctx.out(), "R{}.x", id.index.Value());
167 }
168 } else {
169 if (id.is_null != 0) {
170 return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC");
171 }
172 if (id.is_long != 0) {
173 return fmt::format_to(ctx.out(), "D{}", id.index.Value());
174 } else {
175 return fmt::format_to(ctx.out(), "R{}", id.index.Value());
176 }
177 }
178}
179
180} // namespace Shader::Backend::GLASM
181
182template <>
183struct fmt::formatter<Shader::Backend::GLASM::Id> {
184 constexpr auto parse(format_parse_context& ctx) {
185 return ctx.begin();
186 }
187 template <typename FormatContext>
188 auto format(Shader::Backend::GLASM::Id id, FormatContext& ctx) {
189 return Shader::Backend::GLASM::FormatTo<true>(ctx, id);
190 }
191};
192
193template <>
194struct fmt::formatter<Shader::Backend::GLASM::Register> {
195 constexpr auto parse(format_parse_context& ctx) {
196 return ctx.begin();
197 }
198 template <typename FormatContext>
199 auto format(const Shader::Backend::GLASM::Register& value, FormatContext& ctx) {
200 if (value.type != Shader::Backend::GLASM::Type::Register) {
201 throw Shader::InvalidArgument("Register value type is not register");
202 }
203 return Shader::Backend::GLASM::FormatTo<false>(ctx, value.id);
204 }
205};
206
207template <>
208struct fmt::formatter<Shader::Backend::GLASM::ScalarRegister> {
209 constexpr auto parse(format_parse_context& ctx) {
210 return ctx.begin();
211 }
212 template <typename FormatContext>
213 auto format(const Shader::Backend::GLASM::ScalarRegister& value, FormatContext& ctx) {
214 if (value.type != Shader::Backend::GLASM::Type::Register) {
215 throw Shader::InvalidArgument("Register value type is not register");
216 }
217 return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
218 }
219};
220
221template <>
222struct fmt::formatter<Shader::Backend::GLASM::ScalarU32> {
223 constexpr auto parse(format_parse_context& ctx) {
224 return ctx.begin();
225 }
226 template <typename FormatContext>
227 auto format(const Shader::Backend::GLASM::ScalarU32& value, FormatContext& ctx) {
228 switch (value.type) {
229 case Shader::Backend::GLASM::Type::Void:
230 break;
231 case Shader::Backend::GLASM::Type::Register:
232 return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
233 case Shader::Backend::GLASM::Type::U32:
234 return fmt::format_to(ctx.out(), "{}", value.imm_u32);
235 case Shader::Backend::GLASM::Type::U64:
236 break;
237 }
238 throw Shader::InvalidArgument("Invalid value type {}", value.type);
239 }
240};
241
242template <>
243struct fmt::formatter<Shader::Backend::GLASM::ScalarS32> {
244 constexpr auto parse(format_parse_context& ctx) {
245 return ctx.begin();
246 }
247 template <typename FormatContext>
248 auto format(const Shader::Backend::GLASM::ScalarS32& value, FormatContext& ctx) {
249 switch (value.type) {
250 case Shader::Backend::GLASM::Type::Void:
251 break;
252 case Shader::Backend::GLASM::Type::Register:
253 return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
254 case Shader::Backend::GLASM::Type::U32:
255 return fmt::format_to(ctx.out(), "{}", static_cast<s32>(value.imm_u32));
256 case Shader::Backend::GLASM::Type::U64:
257 break;
258 }
259 throw Shader::InvalidArgument("Invalid value type {}", value.type);
260 }
261};
262
263template <>
264struct fmt::formatter<Shader::Backend::GLASM::ScalarF32> {
265 constexpr auto parse(format_parse_context& ctx) {
266 return ctx.begin();
267 }
268 template <typename FormatContext>
269 auto format(const Shader::Backend::GLASM::ScalarF32& value, FormatContext& ctx) {
270 switch (value.type) {
271 case Shader::Backend::GLASM::Type::Void:
272 break;
273 case Shader::Backend::GLASM::Type::Register:
274 return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
275 case Shader::Backend::GLASM::Type::U32:
276 return fmt::format_to(ctx.out(), "{}", Common::BitCast<f32>(value.imm_u32));
277 case Shader::Backend::GLASM::Type::U64:
278 break;
279 }
280 throw Shader::InvalidArgument("Invalid value type {}", value.type);
281 }
282};
283
284template <>
285struct fmt::formatter<Shader::Backend::GLASM::ScalarF64> {
286 constexpr auto parse(format_parse_context& ctx) {
287 return ctx.begin();
288 }
289 template <typename FormatContext>
290 auto format(const Shader::Backend::GLASM::ScalarF64& value, FormatContext& ctx) {
291 switch (value.type) {
292 case Shader::Backend::GLASM::Type::Void:
293 break;
294 case Shader::Backend::GLASM::Type::Register:
295 return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
296 case Shader::Backend::GLASM::Type::U32:
297 break;
298 case Shader::Backend::GLASM::Type::U64:
299 return fmt::format_to(ctx.out(), "{}", Common::BitCast<f64>(value.imm_u64));
300 }
301 throw Shader::InvalidArgument("Invalid value type {}", value.type);
302 }
303};
diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp
new file mode 100644
index 000000000..4e6f2c0fe
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_context.cpp
@@ -0,0 +1,715 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/bindings.h"
6#include "shader_recompiler/backend/glsl/emit_context.h"
7#include "shader_recompiler/frontend/ir/program.h"
8#include "shader_recompiler/profile.h"
9#include "shader_recompiler/runtime_info.h"
10
11namespace Shader::Backend::GLSL {
12namespace {
13u32 CbufIndex(size_t offset) {
14 return (offset / 4) % 4;
15}
16
17char Swizzle(size_t offset) {
18 return "xyzw"[CbufIndex(offset)];
19}
20
21std::string_view InterpDecorator(Interpolation interp) {
22 switch (interp) {
23 case Interpolation::Smooth:
24 return "";
25 case Interpolation::Flat:
26 return "flat ";
27 case Interpolation::NoPerspective:
28 return "noperspective ";
29 }
30 throw InvalidArgument("Invalid interpolation {}", interp);
31}
32
33std::string_view InputArrayDecorator(Stage stage) {
34 switch (stage) {
35 case Stage::Geometry:
36 case Stage::TessellationControl:
37 case Stage::TessellationEval:
38 return "[]";
39 default:
40 return "";
41 }
42}
43
44bool StoresPerVertexAttributes(Stage stage) {
45 switch (stage) {
46 case Stage::VertexA:
47 case Stage::VertexB:
48 case Stage::Geometry:
49 case Stage::TessellationEval:
50 return true;
51 default:
52 return false;
53 }
54}
55
56std::string OutputDecorator(Stage stage, u32 size) {
57 switch (stage) {
58 case Stage::TessellationControl:
59 return fmt::format("[{}]", size);
60 default:
61 return "";
62 }
63}
64
65std::string_view SamplerType(TextureType type, bool is_depth) {
66 if (is_depth) {
67 switch (type) {
68 case TextureType::Color1D:
69 return "sampler1DShadow";
70 case TextureType::ColorArray1D:
71 return "sampler1DArrayShadow";
72 case TextureType::Color2D:
73 return "sampler2DShadow";
74 case TextureType::ColorArray2D:
75 return "sampler2DArrayShadow";
76 case TextureType::ColorCube:
77 return "samplerCubeShadow";
78 case TextureType::ColorArrayCube:
79 return "samplerCubeArrayShadow";
80 default:
81 throw NotImplementedException("Texture type: {}", type);
82 }
83 }
84 switch (type) {
85 case TextureType::Color1D:
86 return "sampler1D";
87 case TextureType::ColorArray1D:
88 return "sampler1DArray";
89 case TextureType::Color2D:
90 return "sampler2D";
91 case TextureType::ColorArray2D:
92 return "sampler2DArray";
93 case TextureType::Color3D:
94 return "sampler3D";
95 case TextureType::ColorCube:
96 return "samplerCube";
97 case TextureType::ColorArrayCube:
98 return "samplerCubeArray";
99 case TextureType::Buffer:
100 return "samplerBuffer";
101 default:
102 throw NotImplementedException("Texture type: {}", type);
103 }
104}
105
106std::string_view ImageType(TextureType type) {
107 switch (type) {
108 case TextureType::Color1D:
109 return "uimage1D";
110 case TextureType::ColorArray1D:
111 return "uimage1DArray";
112 case TextureType::Color2D:
113 return "uimage2D";
114 case TextureType::ColorArray2D:
115 return "uimage2DArray";
116 case TextureType::Color3D:
117 return "uimage3D";
118 case TextureType::ColorCube:
119 return "uimageCube";
120 case TextureType::ColorArrayCube:
121 return "uimageCubeArray";
122 case TextureType::Buffer:
123 return "uimageBuffer";
124 default:
125 throw NotImplementedException("Image type: {}", type);
126 }
127}
128
129std::string_view ImageFormatString(ImageFormat format) {
130 switch (format) {
131 case ImageFormat::Typeless:
132 return "";
133 case ImageFormat::R8_UINT:
134 return ",r8ui";
135 case ImageFormat::R8_SINT:
136 return ",r8i";
137 case ImageFormat::R16_UINT:
138 return ",r16ui";
139 case ImageFormat::R16_SINT:
140 return ",r16i";
141 case ImageFormat::R32_UINT:
142 return ",r32ui";
143 case ImageFormat::R32G32_UINT:
144 return ",rg32ui";
145 case ImageFormat::R32G32B32A32_UINT:
146 return ",rgba32ui";
147 default:
148 throw NotImplementedException("Image format: {}", format);
149 }
150}
151
152std::string_view ImageAccessQualifier(bool is_written, bool is_read) {
153 if (is_written && !is_read) {
154 return "writeonly ";
155 }
156 if (is_read && !is_written) {
157 return "readonly ";
158 }
159 return "";
160}
161
162std::string_view GetTessMode(TessPrimitive primitive) {
163 switch (primitive) {
164 case TessPrimitive::Triangles:
165 return "triangles";
166 case TessPrimitive::Quads:
167 return "quads";
168 case TessPrimitive::Isolines:
169 return "isolines";
170 }
171 throw InvalidArgument("Invalid tessellation primitive {}", primitive);
172}
173
174std::string_view GetTessSpacing(TessSpacing spacing) {
175 switch (spacing) {
176 case TessSpacing::Equal:
177 return "equal_spacing";
178 case TessSpacing::FractionalOdd:
179 return "fractional_odd_spacing";
180 case TessSpacing::FractionalEven:
181 return "fractional_even_spacing";
182 }
183 throw InvalidArgument("Invalid tessellation spacing {}", spacing);
184}
185
186std::string_view InputPrimitive(InputTopology topology) {
187 switch (topology) {
188 case InputTopology::Points:
189 return "points";
190 case InputTopology::Lines:
191 return "lines";
192 case InputTopology::LinesAdjacency:
193 return "lines_adjacency";
194 case InputTopology::Triangles:
195 return "triangles";
196 case InputTopology::TrianglesAdjacency:
197 return "triangles_adjacency";
198 }
199 throw InvalidArgument("Invalid input topology {}", topology);
200}
201
202std::string_view OutputPrimitive(OutputTopology topology) {
203 switch (topology) {
204 case OutputTopology::PointList:
205 return "points";
206 case OutputTopology::LineStrip:
207 return "line_strip";
208 case OutputTopology::TriangleStrip:
209 return "triangle_strip";
210 }
211 throw InvalidArgument("Invalid output topology {}", topology);
212}
213
214void SetupLegacyOutPerVertex(EmitContext& ctx, std::string& header) {
215 if (!ctx.info.stores.Legacy()) {
216 return;
217 }
218 if (ctx.info.stores.FixedFunctionTexture()) {
219 header += "vec4 gl_TexCoord[8];";
220 }
221 if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
222 header += "vec4 gl_FrontColor;";
223 }
224 if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontSpecularR)) {
225 header += "vec4 gl_FrontSecondaryColor;";
226 }
227 if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackDiffuseR)) {
228 header += "vec4 gl_BackColor;";
229 }
230 if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackSpecularR)) {
231 header += "vec4 gl_BackSecondaryColor;";
232 }
233}
234
235void SetupOutPerVertex(EmitContext& ctx, std::string& header) {
236 if (!StoresPerVertexAttributes(ctx.stage)) {
237 return;
238 }
239 if (ctx.uses_geometry_passthrough) {
240 return;
241 }
242 header += "out gl_PerVertex{vec4 gl_Position;";
243 if (ctx.info.stores[IR::Attribute::PointSize]) {
244 header += "float gl_PointSize;";
245 }
246 if (ctx.info.stores.ClipDistances()) {
247 header += "float gl_ClipDistance[];";
248 }
249 if (ctx.info.stores[IR::Attribute::ViewportIndex] &&
250 ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) {
251 header += "int gl_ViewportIndex;";
252 }
253 SetupLegacyOutPerVertex(ctx, header);
254 header += "};";
255 if (ctx.info.stores[IR::Attribute::ViewportIndex] && ctx.stage == Stage::Geometry) {
256 header += "out int gl_ViewportIndex;";
257 }
258}
259
260void SetupInPerVertex(EmitContext& ctx, std::string& header) {
261 // Currently only required for TessellationControl to adhere to
262 // ARB_separate_shader_objects requirements
263 if (ctx.stage != Stage::TessellationControl) {
264 return;
265 }
266 const bool loads_position{ctx.info.loads.AnyComponent(IR::Attribute::PositionX)};
267 const bool loads_point_size{ctx.info.loads[IR::Attribute::PointSize]};
268 const bool loads_clip_distance{ctx.info.loads.ClipDistances()};
269 const bool loads_per_vertex{loads_position || loads_point_size || loads_clip_distance};
270 if (!loads_per_vertex) {
271 return;
272 }
273 header += "in gl_PerVertex{";
274 if (loads_position) {
275 header += "vec4 gl_Position;";
276 }
277 if (loads_point_size) {
278 header += "float gl_PointSize;";
279 }
280 if (loads_clip_distance) {
281 header += "float gl_ClipDistance[];";
282 }
283 header += "}gl_in[gl_MaxPatchVertices];";
284}
285
286void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) {
287 if (!ctx.info.loads.Legacy()) {
288 return;
289 }
290 header += "in gl_PerFragment{";
291 if (ctx.info.loads.FixedFunctionTexture()) {
292 header += "vec4 gl_TexCoord[8];";
293 }
294 if (ctx.info.loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
295 header += "vec4 gl_Color;";
296 }
297 header += "};";
298}
299
300} // Anonymous namespace
301
302EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
303 const RuntimeInfo& runtime_info_)
304 : info{program.info}, profile{profile_}, runtime_info{runtime_info_}, stage{program.stage},
305 uses_geometry_passthrough{program.is_geometry_passthrough &&
306 profile.support_geometry_shader_passthrough} {
307 if (profile.need_fastmath_off) {
308 header += "#pragma optionNV(fastmath off)\n";
309 }
310 SetupExtensions();
311 switch (program.stage) {
312 case Stage::VertexA:
313 case Stage::VertexB:
314 stage_name = "vs";
315 break;
316 case Stage::TessellationControl:
317 stage_name = "tcs";
318 header += fmt::format("layout(vertices={})out;", program.invocations);
319 break;
320 case Stage::TessellationEval:
321 stage_name = "tes";
322 header += fmt::format("layout({},{},{})in;", GetTessMode(runtime_info.tess_primitive),
323 GetTessSpacing(runtime_info.tess_spacing),
324 runtime_info.tess_clockwise ? "cw" : "ccw");
325 break;
326 case Stage::Geometry:
327 stage_name = "gs";
328 header += fmt::format("layout({})in;", InputPrimitive(runtime_info.input_topology));
329 if (uses_geometry_passthrough) {
330 header += "layout(passthrough)in gl_PerVertex{vec4 gl_Position;};";
331 break;
332 } else if (program.is_geometry_passthrough &&
333 !profile.support_geometry_shader_passthrough) {
334 LOG_WARNING(Shader_GLSL, "Passthrough geometry program used but not supported");
335 }
336 header += fmt::format(
337 "layout({},max_vertices={})out;in gl_PerVertex{{vec4 gl_Position;}}gl_in[];",
338 OutputPrimitive(program.output_topology), program.output_vertices);
339 break;
340 case Stage::Fragment:
341 stage_name = "fs";
342 position_name = "gl_FragCoord";
343 if (runtime_info.force_early_z) {
344 header += "layout(early_fragment_tests)in;";
345 }
346 if (info.uses_sample_id) {
347 header += "in int gl_SampleID;";
348 }
349 if (info.stores_sample_mask) {
350 header += "out int gl_SampleMask[];";
351 }
352 break;
353 case Stage::Compute:
354 stage_name = "cs";
355 const u32 local_x{std::max(program.workgroup_size[0], 1u)};
356 const u32 local_y{std::max(program.workgroup_size[1], 1u)};
357 const u32 local_z{std::max(program.workgroup_size[2], 1u)};
358 header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;",
359 local_x, local_y, local_z);
360 break;
361 }
362 SetupOutPerVertex(*this, header);
363 SetupInPerVertex(*this, header);
364 SetupLegacyInPerFragment(*this, header);
365
366 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
367 if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) {
368 continue;
369 }
370 const auto qualifier{uses_geometry_passthrough ? "passthrough"
371 : fmt::format("location={}", index)};
372 header += fmt::format("layout({}){}in vec4 in_attr{}{};", qualifier,
373 InterpDecorator(info.interpolation[index]), index,
374 InputArrayDecorator(stage));
375 }
376 for (size_t index = 0; index < info.uses_patches.size(); ++index) {
377 if (!info.uses_patches[index]) {
378 continue;
379 }
380 const auto qualifier{stage == Stage::TessellationControl ? "out" : "in"};
381 header += fmt::format("layout(location={})patch {} vec4 patch{};", index, qualifier, index);
382 }
383 if (stage == Stage::Fragment) {
384 for (size_t index = 0; index < info.stores_frag_color.size(); ++index) {
385 if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) {
386 continue;
387 }
388 header += fmt::format("layout(location={})out vec4 frag_color{};", index, index);
389 }
390 }
391 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
392 if (info.stores.Generic(index)) {
393 DefineGenericOutput(index, program.invocations);
394 }
395 }
396 DefineConstantBuffers(bindings);
397 DefineStorageBuffers(bindings);
398 SetupImages(bindings);
399 SetupTextures(bindings);
400 DefineHelperFunctions();
401 DefineConstants();
402}
403
404void EmitContext::SetupExtensions() {
405 header += "#extension GL_ARB_separate_shader_objects : enable\n";
406 if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) {
407 header += "#extension GL_EXT_texture_shadow_lod : enable\n";
408 }
409 if (info.uses_int64 && profile.support_int64) {
410 header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
411 }
412 if (info.uses_int64_bit_atomics) {
413 header += "#extension GL_NV_shader_atomic_int64 : enable\n";
414 }
415 if (info.uses_atomic_f32_add) {
416 header += "#extension GL_NV_shader_atomic_float : enable\n";
417 }
418 if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
419 header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n";
420 }
421 if (info.uses_fp16) {
422 if (profile.support_gl_nv_gpu_shader_5) {
423 header += "#extension GL_NV_gpu_shader5 : enable\n";
424 }
425 if (profile.support_gl_amd_gpu_shader_half_float) {
426 header += "#extension GL_AMD_gpu_shader_half_float : enable\n";
427 }
428 }
429 if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote ||
430 info.uses_subgroup_shuffles || info.uses_fswzadd) {
431 header += "#extension GL_ARB_shader_ballot : enable\n"
432 "#extension GL_ARB_shader_group_vote : enable\n";
433 if (!info.uses_int64 && profile.support_int64) {
434 header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
435 }
436 if (profile.support_gl_warp_intrinsics) {
437 header += "#extension GL_NV_shader_thread_shuffle : enable\n";
438 }
439 }
440 if ((info.stores[IR::Attribute::ViewportIndex] || info.stores[IR::Attribute::Layer]) &&
441 profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) {
442 header += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
443 }
444 if (info.uses_sparse_residency && profile.support_gl_sparse_textures) {
445 header += "#extension GL_ARB_sparse_texture2 : enable\n";
446 }
447 if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) {
448 header += "#extension GL_NV_viewport_array2 : enable\n";
449 }
450 if (info.uses_typeless_image_reads) {
451 header += "#extension GL_EXT_shader_image_load_formatted : enable\n";
452 }
453 if (info.uses_derivatives && profile.support_gl_derivative_control) {
454 header += "#extension GL_ARB_derivative_control : enable\n";
455 }
456 if (uses_geometry_passthrough) {
457 header += "#extension GL_NV_geometry_shader_passthrough : enable\n";
458 }
459}
460
461void EmitContext::DefineConstantBuffers(Bindings& bindings) {
462 if (info.constant_buffer_descriptors.empty()) {
463 return;
464 }
465 for (const auto& desc : info.constant_buffer_descriptors) {
466 header += fmt::format(
467 "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};",
468 bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024);
469 bindings.uniform_buffer += desc.count;
470 }
471}
472
473void EmitContext::DefineStorageBuffers(Bindings& bindings) {
474 if (info.storage_buffers_descriptors.empty()) {
475 return;
476 }
477 u32 index{};
478 for (const auto& desc : info.storage_buffers_descriptors) {
479 header += fmt::format("layout(std430,binding={}) buffer {}_ssbo_{}{{uint {}_ssbo{}[];}};",
480 bindings.storage_buffer, stage_name, bindings.storage_buffer,
481 stage_name, index);
482 bindings.storage_buffer += desc.count;
483 index += desc.count;
484 }
485}
486
487void EmitContext::DefineGenericOutput(size_t index, u32 invocations) {
488 static constexpr std::string_view swizzle{"xyzw"};
489 const size_t base_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
490 u32 element{0};
491 while (element < 4) {
492 std::string definition{fmt::format("layout(location={}", index)};
493 const u32 remainder{4 - element};
494 const TransformFeedbackVarying* xfb_varying{};
495 if (!runtime_info.xfb_varyings.empty()) {
496 xfb_varying = &runtime_info.xfb_varyings[base_index + element];
497 xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr;
498 }
499 const u32 num_components{xfb_varying ? xfb_varying->components : remainder};
500 if (element > 0) {
501 definition += fmt::format(",component={}", element);
502 }
503 if (xfb_varying) {
504 definition +=
505 fmt::format(",xfb_buffer={},xfb_stride={},xfb_offset={}", xfb_varying->buffer,
506 xfb_varying->stride, xfb_varying->offset);
507 }
508 std::string name{fmt::format("out_attr{}", index)};
509 if (num_components < 4 || element > 0) {
510 name += fmt::format("_{}", swizzle.substr(element, num_components));
511 }
512 const auto type{num_components == 1 ? "float" : fmt::format("vec{}", num_components)};
513 definition += fmt::format(")out {} {}{};", type, name, OutputDecorator(stage, invocations));
514 header += definition;
515
516 const GenericElementInfo element_info{
517 .name = name,
518 .first_element = element,
519 .num_components = num_components,
520 };
521 std::fill_n(output_generics[index].begin() + element, num_components, element_info);
522 element += num_components;
523 }
524}
525
526void EmitContext::DefineHelperFunctions() {
527 header += "\n#define ftoi floatBitsToInt\n#define ftou floatBitsToUint\n"
528 "#define itof intBitsToFloat\n#define utof uintBitsToFloat\n";
529 if (info.uses_global_increment || info.uses_shared_increment) {
530 header += "uint CasIncrement(uint op_a,uint op_b){return op_a>=op_b?0u:(op_a+1u);}";
531 }
532 if (info.uses_global_decrement || info.uses_shared_decrement) {
533 header += "uint CasDecrement(uint op_a,uint op_b){"
534 "return op_a==0||op_a>op_b?op_b:(op_a-1u);}";
535 }
536 if (info.uses_atomic_f32_add) {
537 header += "uint CasFloatAdd(uint op_a,float op_b){"
538 "return ftou(utof(op_a)+op_b);}";
539 }
540 if (info.uses_atomic_f32x2_add) {
541 header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){"
542 "return packHalf2x16(unpackHalf2x16(op_a)+op_b);}";
543 }
544 if (info.uses_atomic_f32x2_min) {
545 header += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return "
546 "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}";
547 }
548 if (info.uses_atomic_f32x2_max) {
549 header += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return "
550 "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}";
551 }
552 if (info.uses_atomic_f16x2_add) {
553 header += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return "
554 "packFloat2x16(unpackFloat2x16(op_a)+op_b);}";
555 }
556 if (info.uses_atomic_f16x2_min) {
557 header += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return "
558 "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}";
559 }
560 if (info.uses_atomic_f16x2_max) {
561 header += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return "
562 "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}";
563 }
564 if (info.uses_atomic_s32_min) {
565 header += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}";
566 }
567 if (info.uses_atomic_s32_max) {
568 header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}";
569 }
570 if (info.uses_global_memory && profile.support_int64) {
571 header += DefineGlobalMemoryFunctions();
572 }
573 if (info.loads_indexed_attributes) {
574 const bool is_array{stage == Stage::Geometry};
575 const auto vertex_arg{is_array ? ",uint vertex" : ""};
576 std::string func{
577 fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint "
578 "masked_index=uint(base_index)&3u;switch(base_index>>2){{",
579 vertex_arg)};
580 if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
581 const auto position_idx{is_array ? "gl_in[vertex]." : ""};
582 func += fmt::format("case {}:return {}{}[masked_index];",
583 static_cast<u32>(IR::Attribute::PositionX) >> 2, position_idx,
584 position_name);
585 }
586 const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
587 for (u32 index = 0; index < IR::NUM_GENERICS; ++index) {
588 if (!info.loads.Generic(index)) {
589 continue;
590 }
591 const auto vertex_idx{is_array ? "[vertex]" : ""};
592 func += fmt::format("case {}:return in_attr{}{}[masked_index];",
593 base_attribute_value + index, index, vertex_idx);
594 }
595 func += "default: return 0.0;}}";
596 header += func;
597 }
598 if (info.stores_indexed_attributes) {
599 // TODO
600 }
601}
602
603std::string EmitContext::DefineGlobalMemoryFunctions() {
604 const auto define_body{[&](std::string& func, size_t index, std::string_view return_statement) {
605 const auto& ssbo{info.storage_buffers_descriptors[index]};
606 const u32 size_cbuf_offset{ssbo.cbuf_offset + 8};
607 const auto ssbo_addr{fmt::format("ssbo_addr{}", index)};
608 const auto cbuf{fmt::format("{}_cbuf{}", stage_name, ssbo.cbuf_index)};
609 std::array<std::string, 2> addr_xy;
610 std::array<std::string, 2> size_xy;
611 for (size_t i = 0; i < addr_xy.size(); ++i) {
612 const auto addr_loc{ssbo.cbuf_offset + 4 * i};
613 const auto size_loc{size_cbuf_offset + 4 * i};
614 addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc));
615 size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc));
616 }
617 const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])};
618 const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)};
619 func += addr_statment;
620
621 const auto size_vec{fmt::format("uvec2({},{})", size_xy[0], size_xy[1])};
622 const auto comp_lhs{fmt::format("(addr>={})", ssbo_addr)};
623 const auto comp_rhs{fmt::format("(addr<({}+uint64_t({})))", ssbo_addr, size_vec)};
624 const auto comparison{fmt::format("if({}&&{}){{", comp_lhs, comp_rhs)};
625 func += comparison;
626
627 const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)};
628 func += fmt::format(fmt::runtime(return_statement), ssbo_name, ssbo_addr);
629 }};
630 std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){"};
631 std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){"};
632 std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){"};
633 std::string load_func{"uint LoadGlobal32(uint64_t addr){"};
634 std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){"};
635 std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){"};
636 const size_t num_buffers{info.storage_buffers_descriptors.size()};
637 for (size_t index = 0; index < num_buffers; ++index) {
638 if (!info.nvn_buffer_used[index]) {
639 continue;
640 }
641 define_body(write_func, index, "{0}[uint(addr-{1})>>2]=data;return;}}");
642 define_body(write_func_64, index,
643 "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;return;}}");
644 define_body(write_func_128, index,
645 "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;{0}[uint("
646 "addr-{1}+8)>>2]=data.z;{0}[uint(addr-{1}+12)>>2]=data.w;return;}}");
647 define_body(load_func, index, "return {0}[uint(addr-{1})>>2];}}");
648 define_body(load_func_64, index,
649 "return uvec2({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2]);}}");
650 define_body(load_func_128, index,
651 "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}["
652 "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}");
653 }
654 write_func += '}';
655 write_func_64 += '}';
656 write_func_128 += '}';
657 load_func += "return 0u;}";
658 load_func_64 += "return uvec2(0);}";
659 load_func_128 += "return uvec4(0);}";
660 return write_func + write_func_64 + write_func_128 + load_func + load_func_64 + load_func_128;
661}
662
663void EmitContext::SetupImages(Bindings& bindings) {
664 image_buffers.reserve(info.image_buffer_descriptors.size());
665 for (const auto& desc : info.image_buffer_descriptors) {
666 image_buffers.push_back({bindings.image, desc.count});
667 const auto format{ImageFormatString(desc.format)};
668 const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
669 const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
670 header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};",
671 bindings.image, format, qualifier, bindings.image, array_decorator);
672 bindings.image += desc.count;
673 }
674 images.reserve(info.image_descriptors.size());
675 for (const auto& desc : info.image_descriptors) {
676 images.push_back({bindings.image, desc.count});
677 const auto format{ImageFormatString(desc.format)};
678 const auto image_type{ImageType(desc.type)};
679 const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
680 const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
681 header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format,
682 qualifier, image_type, bindings.image, array_decorator);
683 bindings.image += desc.count;
684 }
685}
686
687void EmitContext::SetupTextures(Bindings& bindings) {
688 texture_buffers.reserve(info.texture_buffer_descriptors.size());
689 for (const auto& desc : info.texture_buffer_descriptors) {
690 texture_buffers.push_back({bindings.texture, desc.count});
691 const auto sampler_type{SamplerType(TextureType::Buffer, false)};
692 const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
693 header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture,
694 sampler_type, bindings.texture, array_decorator);
695 bindings.texture += desc.count;
696 }
697 textures.reserve(info.texture_descriptors.size());
698 for (const auto& desc : info.texture_descriptors) {
699 textures.push_back({bindings.texture, desc.count});
700 const auto sampler_type{SamplerType(desc.type, desc.is_depth)};
701 const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
702 header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture,
703 sampler_type, bindings.texture, array_decorator);
704 bindings.texture += desc.count;
705 }
706}
707
708void EmitContext::DefineConstants() {
709 if (info.uses_fswzadd) {
710 header += "const float FSWZ_A[]=float[4](-1.f,1.f,-1.f,0.f);"
711 "const float FSWZ_B[]=float[4](-1.f,-1.f,1.f,-1.f);";
712 }
713}
714
715} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h
new file mode 100644
index 000000000..d9b639d29
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_context.h
@@ -0,0 +1,174 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <utility>
9#include <vector>
10
11#include <fmt/format.h>
12
13#include "shader_recompiler/backend/glsl/var_alloc.h"
14#include "shader_recompiler/stage.h"
15
16namespace Shader {
17struct Info;
18struct Profile;
19struct RuntimeInfo;
20} // namespace Shader
21
22namespace Shader::Backend {
23struct Bindings;
24}
25
26namespace Shader::IR {
27class Inst;
28struct Program;
29} // namespace Shader::IR
30
31namespace Shader::Backend::GLSL {
32
33struct GenericElementInfo {
34 std::string name;
35 u32 first_element{};
36 u32 num_components{};
37};
38
39struct TextureImageDefinition {
40 u32 binding;
41 u32 count;
42};
43
44class EmitContext {
45public:
46 explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
47 const RuntimeInfo& runtime_info_);
48
49 template <GlslVarType type, typename... Args>
50 void Add(const char* format_str, IR::Inst& inst, Args&&... args) {
51 const auto var_def{var_alloc.AddDefine(inst, type)};
52 if (var_def.empty()) {
53 // skip assigment.
54 code += fmt::format(fmt::runtime(format_str + 3), std::forward<Args>(args)...);
55 } else {
56 code += fmt::format(fmt::runtime(format_str), var_def, std::forward<Args>(args)...);
57 }
58 // TODO: Remove this
59 code += '\n';
60 }
61
62 template <typename... Args>
63 void AddU1(const char* format_str, IR::Inst& inst, Args&&... args) {
64 Add<GlslVarType::U1>(format_str, inst, args...);
65 }
66
67 template <typename... Args>
68 void AddF16x2(const char* format_str, IR::Inst& inst, Args&&... args) {
69 Add<GlslVarType::F16x2>(format_str, inst, args...);
70 }
71
72 template <typename... Args>
73 void AddU32(const char* format_str, IR::Inst& inst, Args&&... args) {
74 Add<GlslVarType::U32>(format_str, inst, args...);
75 }
76
77 template <typename... Args>
78 void AddF32(const char* format_str, IR::Inst& inst, Args&&... args) {
79 Add<GlslVarType::F32>(format_str, inst, args...);
80 }
81
82 template <typename... Args>
83 void AddU64(const char* format_str, IR::Inst& inst, Args&&... args) {
84 Add<GlslVarType::U64>(format_str, inst, args...);
85 }
86
87 template <typename... Args>
88 void AddF64(const char* format_str, IR::Inst& inst, Args&&... args) {
89 Add<GlslVarType::F64>(format_str, inst, args...);
90 }
91
92 template <typename... Args>
93 void AddU32x2(const char* format_str, IR::Inst& inst, Args&&... args) {
94 Add<GlslVarType::U32x2>(format_str, inst, args...);
95 }
96
97 template <typename... Args>
98 void AddF32x2(const char* format_str, IR::Inst& inst, Args&&... args) {
99 Add<GlslVarType::F32x2>(format_str, inst, args...);
100 }
101
102 template <typename... Args>
103 void AddU32x3(const char* format_str, IR::Inst& inst, Args&&... args) {
104 Add<GlslVarType::U32x3>(format_str, inst, args...);
105 }
106
107 template <typename... Args>
108 void AddF32x3(const char* format_str, IR::Inst& inst, Args&&... args) {
109 Add<GlslVarType::F32x3>(format_str, inst, args...);
110 }
111
112 template <typename... Args>
113 void AddU32x4(const char* format_str, IR::Inst& inst, Args&&... args) {
114 Add<GlslVarType::U32x4>(format_str, inst, args...);
115 }
116
117 template <typename... Args>
118 void AddF32x4(const char* format_str, IR::Inst& inst, Args&&... args) {
119 Add<GlslVarType::F32x4>(format_str, inst, args...);
120 }
121
122 template <typename... Args>
123 void AddPrecF32(const char* format_str, IR::Inst& inst, Args&&... args) {
124 Add<GlslVarType::PrecF32>(format_str, inst, args...);
125 }
126
127 template <typename... Args>
128 void AddPrecF64(const char* format_str, IR::Inst& inst, Args&&... args) {
129 Add<GlslVarType::PrecF64>(format_str, inst, args...);
130 }
131
132 template <typename... Args>
133 void Add(const char* format_str, Args&&... args) {
134 code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...);
135 // TODO: Remove this
136 code += '\n';
137 }
138
139 std::string header;
140 std::string code;
141 VarAlloc var_alloc;
142 const Info& info;
143 const Profile& profile;
144 const RuntimeInfo& runtime_info;
145
146 Stage stage{};
147 std::string_view stage_name = "invalid";
148 std::string_view position_name = "gl_Position";
149
150 std::vector<TextureImageDefinition> texture_buffers;
151 std::vector<TextureImageDefinition> image_buffers;
152 std::vector<TextureImageDefinition> textures;
153 std::vector<TextureImageDefinition> images;
154 std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
155
156 u32 num_safety_loop_vars{};
157
158 bool uses_y_direction{};
159 bool uses_cc_carry{};
160 bool uses_geometry_passthrough{};
161
162private:
163 void SetupExtensions();
164 void DefineConstantBuffers(Bindings& bindings);
165 void DefineStorageBuffers(Bindings& bindings);
166 void DefineGenericOutput(size_t index, u32 invocations);
167 void DefineHelperFunctions();
168 void DefineConstants();
169 std::string DefineGlobalMemoryFunctions();
170 void SetupImages(Bindings& bindings);
171 void SetupTextures(Bindings& bindings);
172};
173
174} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp
new file mode 100644
index 000000000..8a430d573
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp
@@ -0,0 +1,252 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <string>
7#include <tuple>
8#include <type_traits>
9
10#include "common/div_ceil.h"
11#include "common/settings.h"
12#include "shader_recompiler/backend/glsl/emit_context.h"
13#include "shader_recompiler/backend/glsl/emit_glsl.h"
14#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
15#include "shader_recompiler/frontend/ir/ir_emitter.h"
16
17namespace Shader::Backend::GLSL {
18namespace {
19template <class Func>
20struct FuncTraits {};
21
22template <class ReturnType_, class... Args>
23struct FuncTraits<ReturnType_ (*)(Args...)> {
24 using ReturnType = ReturnType_;
25
26 static constexpr size_t NUM_ARGS = sizeof...(Args);
27
28 template <size_t I>
29 using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
30};
31
32template <auto func, typename... Args>
33void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) {
34 inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...));
35}
36
37template <typename ArgType>
38auto Arg(EmitContext& ctx, const IR::Value& arg) {
39 if constexpr (std::is_same_v<ArgType, std::string_view>) {
40 return ctx.var_alloc.Consume(arg);
41 } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
42 return arg;
43 } else if constexpr (std::is_same_v<ArgType, u32>) {
44 return arg.U32();
45 } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
46 return arg.Attribute();
47 } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
48 return arg.Patch();
49 } else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
50 return arg.Reg();
51 }
52}
53
54template <auto func, bool is_first_arg_inst, size_t... I>
55void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
56 using Traits = FuncTraits<decltype(func)>;
57 if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) {
58 if constexpr (is_first_arg_inst) {
59 SetDefinition<func>(
60 ctx, inst, *inst,
61 Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
62 } else {
63 SetDefinition<func>(
64 ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
65 }
66 } else {
67 if constexpr (is_first_arg_inst) {
68 func(ctx, *inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
69 } else {
70 func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
71 }
72 }
73}
74
75template <auto func>
76void Invoke(EmitContext& ctx, IR::Inst* inst) {
77 using Traits = FuncTraits<decltype(func)>;
78 static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
79 if constexpr (Traits::NUM_ARGS == 1) {
80 Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
81 } else {
82 using FirstArgType = typename Traits::template ArgType<1>;
83 static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>;
84 using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
85 Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
86 }
87}
88
89void EmitInst(EmitContext& ctx, IR::Inst* inst) {
90 switch (inst->GetOpcode()) {
91#define OPCODE(name, result_type, ...) \
92 case IR::Opcode::name: \
93 return Invoke<&Emit##name>(ctx, inst);
94#include "shader_recompiler/frontend/ir/opcodes.inc"
95#undef OPCODE
96 }
97 throw LogicError("Invalid opcode {}", inst->GetOpcode());
98}
99
100bool IsReference(IR::Inst& inst) {
101 return inst.GetOpcode() == IR::Opcode::Reference;
102}
103
104void PrecolorInst(IR::Inst& phi) {
105 // Insert phi moves before references to avoid overwritting other phis
106 const size_t num_args{phi.NumArgs()};
107 for (size_t i = 0; i < num_args; ++i) {
108 IR::Block& phi_block{*phi.PhiBlock(i)};
109 auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()};
110 IR::IREmitter ir{phi_block, it};
111 const IR::Value arg{phi.Arg(i)};
112 if (arg.IsImmediate()) {
113 ir.PhiMove(phi, arg);
114 } else {
115 ir.PhiMove(phi, IR::Value{arg.InstRecursive()});
116 }
117 }
118 for (size_t i = 0; i < num_args; ++i) {
119 IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
120 }
121}
122
123void Precolor(const IR::Program& program) {
124 for (IR::Block* const block : program.blocks) {
125 for (IR::Inst& phi : block->Instructions()) {
126 if (!IR::IsPhi(phi)) {
127 break;
128 }
129 PrecolorInst(phi);
130 }
131 }
132}
133
134void EmitCode(EmitContext& ctx, const IR::Program& program) {
135 for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
136 switch (node.type) {
137 case IR::AbstractSyntaxNode::Type::Block:
138 for (IR::Inst& inst : node.data.block->Instructions()) {
139 EmitInst(ctx, &inst);
140 }
141 break;
142 case IR::AbstractSyntaxNode::Type::If:
143 ctx.Add("if({}){{", ctx.var_alloc.Consume(node.data.if_node.cond));
144 break;
145 case IR::AbstractSyntaxNode::Type::EndIf:
146 ctx.Add("}}");
147 break;
148 case IR::AbstractSyntaxNode::Type::Break:
149 if (node.data.break_node.cond.IsImmediate()) {
150 if (node.data.break_node.cond.U1()) {
151 ctx.Add("break;");
152 }
153 } else {
154 ctx.Add("if({}){{break;}}", ctx.var_alloc.Consume(node.data.break_node.cond));
155 }
156 break;
157 case IR::AbstractSyntaxNode::Type::Return:
158 case IR::AbstractSyntaxNode::Type::Unreachable:
159 ctx.Add("return;");
160 break;
161 case IR::AbstractSyntaxNode::Type::Loop:
162 ctx.Add("for(;;){{");
163 break;
164 case IR::AbstractSyntaxNode::Type::Repeat:
165 if (Settings::values.disable_shader_loop_safety_checks) {
166 ctx.Add("if(!{}){{break;}}}}", ctx.var_alloc.Consume(node.data.repeat.cond));
167 } else {
168 ctx.Add("if(--loop{}<0 || !{}){{break;}}}}", ctx.num_safety_loop_vars++,
169 ctx.var_alloc.Consume(node.data.repeat.cond));
170 }
171 break;
172 default:
173 throw NotImplementedException("AbstractSyntaxNode Type {}", node.type);
174 }
175 }
176}
177
178std::string GlslVersionSpecifier(const EmitContext& ctx) {
179 if (ctx.uses_y_direction || ctx.info.stores.Legacy() || ctx.info.loads.Legacy()) {
180 return " compatibility";
181 }
182 return "";
183}
184
185bool IsPreciseType(GlslVarType type) {
186 switch (type) {
187 case GlslVarType::PrecF32:
188 case GlslVarType::PrecF64:
189 return true;
190 default:
191 return false;
192 }
193}
194
195void DefineVariables(const EmitContext& ctx, std::string& header) {
196 for (u32 i = 0; i < static_cast<u32>(GlslVarType::Void); ++i) {
197 const auto type{static_cast<GlslVarType>(i)};
198 const auto& tracker{ctx.var_alloc.GetUseTracker(type)};
199 const auto type_name{ctx.var_alloc.GetGlslType(type)};
200 const bool has_precise_bug{ctx.stage == Stage::Fragment && ctx.profile.has_gl_precise_bug};
201 const auto precise{!has_precise_bug && IsPreciseType(type) ? "precise " : ""};
202 // Temps/return types that are never used are stored at index 0
203 if (tracker.uses_temp) {
204 header += fmt::format("{}{} t{}={}(0);", precise, type_name,
205 ctx.var_alloc.Representation(0, type), type_name);
206 }
207 for (u32 index = 0; index < tracker.num_used; ++index) {
208 header += fmt::format("{}{} {}={}(0);", precise, type_name,
209 ctx.var_alloc.Representation(index, type), type_name);
210 }
211 }
212 for (u32 i = 0; i < ctx.num_safety_loop_vars; ++i) {
213 header += fmt::format("int loop{}=0x2000;", i);
214 }
215}
216} // Anonymous namespace
217
218std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program,
219 Bindings& bindings) {
220 EmitContext ctx{program, bindings, profile, runtime_info};
221 Precolor(program);
222 EmitCode(ctx, program);
223 const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))};
224 ctx.header.insert(0, version);
225 if (program.shared_memory_size > 0) {
226 const auto requested_size{program.shared_memory_size};
227 const auto max_size{profile.gl_max_compute_smem_size};
228 const bool needs_clamp{requested_size > max_size};
229 if (needs_clamp) {
230 LOG_WARNING(Shader_GLSL, "Requested shared memory size ({}) exceeds device limit ({})",
231 requested_size, max_size);
232 }
233 const auto smem_size{needs_clamp ? max_size : requested_size};
234 ctx.header += fmt::format("shared uint smem[{}];", Common::DivCeil(smem_size, 4U));
235 }
236 ctx.header += "void main(){\n";
237 if (program.local_memory_size > 0) {
238 ctx.header += fmt::format("uint lmem[{}];", Common::DivCeil(program.local_memory_size, 4U));
239 }
240 DefineVariables(ctx, ctx.header);
241 if (ctx.uses_cc_carry) {
242 ctx.header += "uint carry;";
243 }
244 if (program.info.uses_subgroup_shuffles) {
245 ctx.header += "bool shfl_in_bounds;";
246 }
247 ctx.code.insert(0, ctx.header);
248 ctx.code += '}';
249 return ctx.code;
250}
251
252} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.h b/src/shader_recompiler/backend/glsl/emit_glsl.h
new file mode 100644
index 000000000..20e5719e6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl.h
@@ -0,0 +1,24 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8
9#include "shader_recompiler/backend/bindings.h"
10#include "shader_recompiler/frontend/ir/program.h"
11#include "shader_recompiler/profile.h"
12#include "shader_recompiler/runtime_info.h"
13
14namespace Shader::Backend::GLSL {
15
16[[nodiscard]] std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info,
17 IR::Program& program, Bindings& bindings);
18
19[[nodiscard]] inline std::string EmitGLSL(const Profile& profile, IR::Program& program) {
20 Bindings binding;
21 return EmitGLSL(profile, {}, program, binding);
22}
23
24} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
new file mode 100644
index 000000000..772acc5a4
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
@@ -0,0 +1,418 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::Backend::GLSL {
12namespace {
13constexpr char cas_loop[]{
14 "for (;;){{uint old={};{}=atomicCompSwap({},old,{}({},{}));if({}==old){{break;}}}}"};
15
16void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
17 std::string_view value, std::string_view function) {
18 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
19 const std::string smem{fmt::format("smem[{}>>2]", offset)};
20 ctx.Add(cas_loop, smem, ret, smem, function, smem, value, ret);
21}
22
23void SsboCasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
24 const IR::Value& offset, std::string_view value, std::string_view function) {
25 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
26 const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(),
27 ctx.var_alloc.Consume(offset))};
28 ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret);
29}
30
31void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
32 const IR::Value& offset, std::string_view value,
33 std::string_view function) {
34 const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(),
35 ctx.var_alloc.Consume(offset))};
36 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
37 ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret);
38 ctx.AddF32("{}=utof({});", inst, ret);
39}
40} // Anonymous namespace
41
42void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
43 std::string_view value) {
44 ctx.AddU32("{}=atomicAdd(smem[{}>>2],{});", inst, pointer_offset, value);
45}
46
47void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
48 std::string_view value) {
49 const std::string u32_value{fmt::format("uint({})", value)};
50 SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMinS32");
51}
52
53void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
54 std::string_view value) {
55 ctx.AddU32("{}=atomicMin(smem[{}>>2],{});", inst, pointer_offset, value);
56}
57
58void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
59 std::string_view value) {
60 const std::string u32_value{fmt::format("uint({})", value)};
61 SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMaxS32");
62}
63
64void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
65 std::string_view value) {
66 ctx.AddU32("{}=atomicMax(smem[{}>>2],{});", inst, pointer_offset, value);
67}
68
69void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
70 std::string_view value) {
71 SharedCasFunction(ctx, inst, pointer_offset, value, "CasIncrement");
72}
73
74void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
75 std::string_view value) {
76 SharedCasFunction(ctx, inst, pointer_offset, value, "CasDecrement");
77}
78
79void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
80 std::string_view value) {
81 ctx.AddU32("{}=atomicAnd(smem[{}>>2],{});", inst, pointer_offset, value);
82}
83
84void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
85 std::string_view value) {
86 ctx.AddU32("{}=atomicOr(smem[{}>>2],{});", inst, pointer_offset, value);
87}
88
89void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
90 std::string_view value) {
91 ctx.AddU32("{}=atomicXor(smem[{}>>2],{});", inst, pointer_offset, value);
92}
93
94void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
95 std::string_view value) {
96 ctx.AddU32("{}=atomicExchange(smem[{}>>2],{});", inst, pointer_offset, value);
97}
98
99void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
100 std::string_view value) {
101 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
102 ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset,
103 pointer_offset);
104 ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;",
105 pointer_offset, value, pointer_offset, value);
106}
107
108void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
109 const IR::Value& offset, std::string_view value) {
110 ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
111 ctx.var_alloc.Consume(offset), value);
112}
113
114void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
115 const IR::Value& offset, std::string_view value) {
116 const std::string u32_value{fmt::format("uint({})", value)};
117 SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMinS32");
118}
119
120void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
121 const IR::Value& offset, std::string_view value) {
122 ctx.AddU32("{}=atomicMin({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
123 ctx.var_alloc.Consume(offset), value);
124}
125
126void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
127 const IR::Value& offset, std::string_view value) {
128 const std::string u32_value{fmt::format("uint({})", value)};
129 SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMaxS32");
130}
131
132void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
133 const IR::Value& offset, std::string_view value) {
134 ctx.AddU32("{}=atomicMax({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
135 ctx.var_alloc.Consume(offset), value);
136}
137
138void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
139 const IR::Value& offset, std::string_view value) {
140 SsboCasFunction(ctx, inst, binding, offset, value, "CasIncrement");
141}
142
143void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
144 const IR::Value& offset, std::string_view value) {
145 SsboCasFunction(ctx, inst, binding, offset, value, "CasDecrement");
146}
147
148void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
149 const IR::Value& offset, std::string_view value) {
150 ctx.AddU32("{}=atomicAnd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
151 ctx.var_alloc.Consume(offset), value);
152}
153
154void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
155 const IR::Value& offset, std::string_view value) {
156 ctx.AddU32("{}=atomicOr({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
157 ctx.var_alloc.Consume(offset), value);
158}
159
160void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
161 const IR::Value& offset, std::string_view value) {
162 ctx.AddU32("{}=atomicXor({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
163 ctx.var_alloc.Consume(offset), value);
164}
165
166void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
167 const IR::Value& offset, std::string_view value) {
168 ctx.AddU32("{}=atomicExchange({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
169 ctx.var_alloc.Consume(offset), value);
170}
171
172void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
173 const IR::Value& offset, std::string_view value) {
174 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
175 ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
176 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
177 binding.U32(), ctx.var_alloc.Consume(offset));
178 ctx.Add("{}_ssbo{}[{}>>2]+=unpackUint2x32({}).x;{}_ssbo{}[({}>>2)+1]+=unpackUint2x32({}).y;",
179 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
180 binding.U32(), ctx.var_alloc.Consume(offset), value);
181}
182
183void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
184 const IR::Value& offset, std::string_view value) {
185 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
186 ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
187 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
188 binding.U32(), ctx.var_alloc.Consume(offset));
189 ctx.Add("for(int i=0;i<2;++i){{ "
190 "{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])"
191 ");}}",
192 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
193 binding.U32(), ctx.var_alloc.Consume(offset), value);
194}
195
196void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
197 const IR::Value& offset, std::string_view value) {
198 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
199 ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
200 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
201 binding.U32(), ctx.var_alloc.Consume(offset));
202 ctx.Add("for(int i=0;i<2;++i){{ "
203 "{}_ssbo{}[({}>>2)+i]=min({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}))[i]);}}",
204 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
205 binding.U32(), ctx.var_alloc.Consume(offset), value);
206}
207
208void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
209 const IR::Value& offset, std::string_view value) {
210 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
211 ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
212 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
213 binding.U32(), ctx.var_alloc.Consume(offset));
214 ctx.Add("for(int i=0;i<2;++i){{ "
215 "{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])"
216 ");}}",
217 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
218 binding.U32(), ctx.var_alloc.Consume(offset), value);
219}
220
221void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
222 const IR::Value& offset, std::string_view value) {
223 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
224 ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
225 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
226 binding.U32(), ctx.var_alloc.Consume(offset));
227 ctx.Add("for(int "
228 "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}"
229 "))[i]);}}",
230 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
231 binding.U32(), ctx.var_alloc.Consume(offset), value);
232}
233
234void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
235 const IR::Value& offset, std::string_view value) {
236 ctx.AddU64(
237 "{}=packUint2x32(uvec2(atomicAnd({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicAnd({}_"
238 "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
239 inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
240 binding.U32(), ctx.var_alloc.Consume(offset), value);
241}
242
243void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
244 const IR::Value& offset, std::string_view value) {
245 ctx.AddU64("{}=packUint2x32(uvec2(atomicOr({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicOr({}_"
246 "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
247 inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
248 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
249}
250
251void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
252 const IR::Value& offset, std::string_view value) {
253 ctx.AddU64(
254 "{}=packUint2x32(uvec2(atomicXor({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicXor({}_"
255 "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
256 inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
257 binding.U32(), ctx.var_alloc.Consume(offset), value);
258}
259
260void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
261 const IR::Value& offset, std::string_view value) {
262 ctx.AddU64("{}=packUint2x32(uvec2(atomicExchange({}_ssbo{}[{}>>2],unpackUint2x32({}).x),"
263 "atomicExchange({}_ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
264 inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
265 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
266}
267
268void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
269 const IR::Value& offset, std::string_view value) {
270 SsboCasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd");
271}
272
273void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
274 const IR::Value& offset, std::string_view value) {
275 SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd16x2");
276}
277
278void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
279 const IR::Value& offset, std::string_view value) {
280 SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd32x2");
281}
282
283void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
284 const IR::Value& offset, std::string_view value) {
285 SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin16x2");
286}
287
288void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
289 const IR::Value& offset, std::string_view value) {
290 SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin32x2");
291}
292
293void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
294 const IR::Value& offset, std::string_view value) {
295 SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax16x2");
296}
297
298void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
299 const IR::Value& offset, std::string_view value) {
300 SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax32x2");
301}
302
303void EmitGlobalAtomicIAdd32(EmitContext&) {
304 throw NotImplementedException("GLSL Instrucion");
305}
306
307void EmitGlobalAtomicSMin32(EmitContext&) {
308 throw NotImplementedException("GLSL Instrucion");
309}
310
311void EmitGlobalAtomicUMin32(EmitContext&) {
312 throw NotImplementedException("GLSL Instrucion");
313}
314
315void EmitGlobalAtomicSMax32(EmitContext&) {
316 throw NotImplementedException("GLSL Instrucion");
317}
318
319void EmitGlobalAtomicUMax32(EmitContext&) {
320 throw NotImplementedException("GLSL Instrucion");
321}
322
323void EmitGlobalAtomicInc32(EmitContext&) {
324 throw NotImplementedException("GLSL Instrucion");
325}
326
327void EmitGlobalAtomicDec32(EmitContext&) {
328 throw NotImplementedException("GLSL Instrucion");
329}
330
331void EmitGlobalAtomicAnd32(EmitContext&) {
332 throw NotImplementedException("GLSL Instrucion");
333}
334
335void EmitGlobalAtomicOr32(EmitContext&) {
336 throw NotImplementedException("GLSL Instrucion");
337}
338
339void EmitGlobalAtomicXor32(EmitContext&) {
340 throw NotImplementedException("GLSL Instrucion");
341}
342
343void EmitGlobalAtomicExchange32(EmitContext&) {
344 throw NotImplementedException("GLSL Instrucion");
345}
346
347void EmitGlobalAtomicIAdd64(EmitContext&) {
348 throw NotImplementedException("GLSL Instrucion");
349}
350
351void EmitGlobalAtomicSMin64(EmitContext&) {
352 throw NotImplementedException("GLSL Instrucion");
353}
354
355void EmitGlobalAtomicUMin64(EmitContext&) {
356 throw NotImplementedException("GLSL Instrucion");
357}
358
359void EmitGlobalAtomicSMax64(EmitContext&) {
360 throw NotImplementedException("GLSL Instrucion");
361}
362
363void EmitGlobalAtomicUMax64(EmitContext&) {
364 throw NotImplementedException("GLSL Instrucion");
365}
366
367void EmitGlobalAtomicInc64(EmitContext&) {
368 throw NotImplementedException("GLSL Instrucion");
369}
370
371void EmitGlobalAtomicDec64(EmitContext&) {
372 throw NotImplementedException("GLSL Instrucion");
373}
374
375void EmitGlobalAtomicAnd64(EmitContext&) {
376 throw NotImplementedException("GLSL Instrucion");
377}
378
379void EmitGlobalAtomicOr64(EmitContext&) {
380 throw NotImplementedException("GLSL Instrucion");
381}
382
383void EmitGlobalAtomicXor64(EmitContext&) {
384 throw NotImplementedException("GLSL Instrucion");
385}
386
387void EmitGlobalAtomicExchange64(EmitContext&) {
388 throw NotImplementedException("GLSL Instrucion");
389}
390
391void EmitGlobalAtomicAddF32(EmitContext&) {
392 throw NotImplementedException("GLSL Instrucion");
393}
394
395void EmitGlobalAtomicAddF16x2(EmitContext&) {
396 throw NotImplementedException("GLSL Instrucion");
397}
398
399void EmitGlobalAtomicAddF32x2(EmitContext&) {
400 throw NotImplementedException("GLSL Instrucion");
401}
402
403void EmitGlobalAtomicMinF16x2(EmitContext&) {
404 throw NotImplementedException("GLSL Instrucion");
405}
406
407void EmitGlobalAtomicMinF32x2(EmitContext&) {
408 throw NotImplementedException("GLSL Instrucion");
409}
410
411void EmitGlobalAtomicMaxF16x2(EmitContext&) {
412 throw NotImplementedException("GLSL Instrucion");
413}
414
415void EmitGlobalAtomicMaxF32x2(EmitContext&) {
416 throw NotImplementedException("GLSL Instrucion");
417}
418} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp
new file mode 100644
index 000000000..e1d1b558e
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp
@@ -0,0 +1,21 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/glsl/emit_context.h"
6#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
7#include "shader_recompiler/frontend/ir/value.h"
8
9namespace Shader::Backend::GLSL {
10void EmitBarrier(EmitContext& ctx) {
11 ctx.Add("barrier();");
12}
13
14void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
15 ctx.Add("groupMemoryBarrier();");
16}
17
18void EmitDeviceMemoryBarrier(EmitContext& ctx) {
19 ctx.Add("memoryBarrier();");
20}
21} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
new file mode 100644
index 000000000..3c1714e89
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
@@ -0,0 +1,94 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::Backend::GLSL {
12namespace {
13void Alias(IR::Inst& inst, const IR::Value& value) {
14 if (value.IsImmediate()) {
15 return;
16 }
17 IR::Inst& value_inst{*value.InstRecursive()};
18 value_inst.DestructiveAddUsage(inst.UseCount());
19 value_inst.DestructiveRemoveUsage();
20 inst.SetDefinition(value_inst.Definition<Id>());
21}
22} // Anonymous namespace
23
24void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) {
25 Alias(inst, value);
26}
27
28void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) {
29 // Fake one usage to get a real variable out of the condition
30 inst.DestructiveAddUsage(1);
31 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U1)};
32 const auto input{ctx.var_alloc.Consume(value)};
33 if (ret != input) {
34 ctx.Add("{}={};", ret, input);
35 }
36}
37
38void EmitBitCastU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) {
39 NotImplemented();
40}
41
42void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
43 ctx.AddU32("{}=ftou({});", inst, value);
44}
45
46void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
47 ctx.AddU64("{}=doubleBitsToUint64({});", inst, value);
48}
49
50void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) {
51 NotImplemented();
52}
53
54void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
55 ctx.AddF32("{}=utof({});", inst, value);
56}
57
58void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
59 ctx.AddF64("{}=uint64BitsToDouble({});", inst, value);
60}
61
62void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
63 ctx.AddU64("{}=packUint2x32({});", inst, value);
64}
65
66void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
67 ctx.AddU32x2("{}=unpackUint2x32({});", inst, value);
68}
69
70void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
71 ctx.AddU32("{}=packFloat2x16({});", inst, value);
72}
73
74void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
75 ctx.AddF16x2("{}=unpackFloat2x16({});", inst, value);
76}
77
78void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
79 ctx.AddU32("{}=packHalf2x16({});", inst, value);
80}
81
82void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
83 ctx.AddF32x2("{}=unpackHalf2x16({});", inst, value);
84}
85
86void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
87 ctx.AddF64("{}=packDouble2x32({});", inst, value);
88}
89
90void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
91 ctx.AddU32x2("{}=unpackDouble2x32({});", inst, value);
92}
93
94} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp
new file mode 100644
index 000000000..49a66e3ec
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp
@@ -0,0 +1,219 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::Backend::GLSL {
12namespace {
13constexpr std::string_view SWIZZLE{"xyzw"};
14void CompositeInsert(EmitContext& ctx, std::string_view result, std::string_view composite,
15 std::string_view object, u32 index) {
16 if (result == composite) {
17 // The result is aliased with the composite
18 ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
19 } else {
20 ctx.Add("{}={};{}.{}={};", result, composite, result, SWIZZLE[index], object);
21 }
22}
23} // Anonymous namespace
24
25void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
26 std::string_view e2) {
27 ctx.AddU32x2("{}=uvec2({},{});", inst, e1, e2);
28}
29
30void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
31 std::string_view e2, std::string_view e3) {
32 ctx.AddU32x3("{}=uvec3({},{},{});", inst, e1, e2, e3);
33}
34
35void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
36 std::string_view e2, std::string_view e3, std::string_view e4) {
37 ctx.AddU32x4("{}=uvec4({},{},{},{});", inst, e1, e2, e3, e4);
38}
39
40void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
41 u32 index) {
42 ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]);
43}
44
45void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
46 u32 index) {
47 ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]);
48}
49
50void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
51 u32 index) {
52 ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]);
53}
54
55void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
56 std::string_view object, u32 index) {
57 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
58 CompositeInsert(ctx, ret, composite, object, index);
59}
60
61void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
62 std::string_view object, u32 index) {
63 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x3)};
64 CompositeInsert(ctx, ret, composite, object, index);
65}
66
67void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
68 std::string_view object, u32 index) {
69 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x4)};
70 CompositeInsert(ctx, ret, composite, object, index);
71}
72
73void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx,
74 [[maybe_unused]] std::string_view e1,
75 [[maybe_unused]] std::string_view e2) {
76 NotImplemented();
77}
78
79void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx,
80 [[maybe_unused]] std::string_view e1,
81 [[maybe_unused]] std::string_view e2,
82 [[maybe_unused]] std::string_view e3) {
83 NotImplemented();
84}
85
86void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx,
87 [[maybe_unused]] std::string_view e1,
88 [[maybe_unused]] std::string_view e2,
89 [[maybe_unused]] std::string_view e3,
90 [[maybe_unused]] std::string_view e4) {
91 NotImplemented();
92}
93
94void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx,
95 [[maybe_unused]] std::string_view composite,
96 [[maybe_unused]] u32 index) {
97 NotImplemented();
98}
99
100void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx,
101 [[maybe_unused]] std::string_view composite,
102 [[maybe_unused]] u32 index) {
103 NotImplemented();
104}
105
106void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx,
107 [[maybe_unused]] std::string_view composite,
108 [[maybe_unused]] u32 index) {
109 NotImplemented();
110}
111
112void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx,
113 [[maybe_unused]] std::string_view composite,
114 [[maybe_unused]] std::string_view object,
115 [[maybe_unused]] u32 index) {
116 NotImplemented();
117}
118
119void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx,
120 [[maybe_unused]] std::string_view composite,
121 [[maybe_unused]] std::string_view object,
122 [[maybe_unused]] u32 index) {
123 NotImplemented();
124}
125
126void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx,
127 [[maybe_unused]] std::string_view composite,
128 [[maybe_unused]] std::string_view object,
129 [[maybe_unused]] u32 index) {
130 NotImplemented();
131}
132
133void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
134 std::string_view e2) {
135 ctx.AddF32x2("{}=vec2({},{});", inst, e1, e2);
136}
137
138void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
139 std::string_view e2, std::string_view e3) {
140 ctx.AddF32x3("{}=vec3({},{},{});", inst, e1, e2, e3);
141}
142
143void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
144 std::string_view e2, std::string_view e3, std::string_view e4) {
145 ctx.AddF32x4("{}=vec4({},{},{},{});", inst, e1, e2, e3, e4);
146}
147
148void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
149 u32 index) {
150 ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]);
151}
152
153void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
154 u32 index) {
155 ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]);
156}
157
158void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
159 u32 index) {
160 ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]);
161}
162
163void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
164 std::string_view object, u32 index) {
165 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x2)};
166 CompositeInsert(ctx, ret, composite, object, index);
167}
168
169void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
170 std::string_view object, u32 index) {
171 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x3)};
172 CompositeInsert(ctx, ret, composite, object, index);
173}
174
175void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
176 std::string_view object, u32 index) {
177 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
178 CompositeInsert(ctx, ret, composite, object, index);
179}
180
181void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) {
182 NotImplemented();
183}
184
185void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) {
186 NotImplemented();
187}
188
189void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) {
190 NotImplemented();
191}
192
193void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) {
194 NotImplemented();
195}
196
197void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) {
198 NotImplemented();
199}
200
201void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) {
202 NotImplemented();
203}
204
205void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object,
206 u32 index) {
207 ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
208}
209
210void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object,
211 u32 index) {
212 ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
213}
214
215void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object,
216 u32 index) {
217 ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
218}
219} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
new file mode 100644
index 000000000..580063fa9
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -0,0 +1,456 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10#include "shader_recompiler/profile.h"
11#include "shader_recompiler/runtime_info.h"
12
13namespace Shader::Backend::GLSL {
14namespace {
15constexpr char SWIZZLE[]{"xyzw"};
16
17u32 CbufIndex(u32 offset) {
18 return (offset / 4) % 4;
19}
20
21char OffsetSwizzle(u32 offset) {
22 return SWIZZLE[CbufIndex(offset)];
23}
24
25bool IsInputArray(Stage stage) {
26 return stage == Stage::Geometry || stage == Stage::TessellationControl ||
27 stage == Stage::TessellationEval;
28}
29
30std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) {
31 return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
32}
33
34std::string_view OutputVertexIndex(EmitContext& ctx) {
35 return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
36}
37
38void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding,
39 const IR::Value& offset, u32 num_bits, std::string_view cast = {},
40 std::string_view bit_offset = {}) {
41 const bool is_immediate{offset.IsImmediate()};
42 const bool component_indexing_bug{!is_immediate && ctx.profile.has_gl_component_indexing_bug};
43 if (is_immediate) {
44 const s32 signed_offset{static_cast<s32>(offset.U32())};
45 static constexpr u32 cbuf_size{0x10000};
46 if (signed_offset < 0 || offset.U32() > cbuf_size) {
47 LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds");
48 ctx.Add("{}=0u;", ret);
49 return;
50 }
51 }
52 const auto offset_var{ctx.var_alloc.Consume(offset)};
53 const auto index{is_immediate ? fmt::format("{}", offset.U32() / 16)
54 : fmt::format("{}>>4", offset_var)};
55 const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32()))
56 : fmt::format("[({}>>2)%4]", offset_var)};
57
58 const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
59 const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)};
60 const auto extraction{num_bits == 32 ? cbuf_cast
61 : fmt ::format("bitfieldExtract({},int({}),{})", cbuf_cast,
62 bit_offset, num_bits)};
63 if (!component_indexing_bug) {
64 const auto result{fmt::format(fmt::runtime(extraction), swizzle)};
65 ctx.Add("{}={};", ret, result);
66 return;
67 }
68 const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
69 for (u32 i = 0; i < 4; ++i) {
70 const auto swizzle_string{fmt::format(".{}", "xyzw"[i])};
71 const auto result{fmt::format(fmt::runtime(extraction), swizzle_string)};
72 ctx.Add("if(({}&3)=={}){}={};", cbuf_offset, i, ret, result);
73 }
74}
75
76void GetCbuf8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset,
77 std::string_view cast) {
78 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
79 if (offset.IsImmediate()) {
80 const auto bit_offset{fmt::format("{}", (offset.U32() % 4) * 8)};
81 GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset);
82 } else {
83 const auto offset_var{ctx.var_alloc.Consume(offset)};
84 const auto bit_offset{fmt::format("({}%4)*8", offset_var)};
85 GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset);
86 }
87}
88
89void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset,
90 std::string_view cast) {
91 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
92 if (offset.IsImmediate()) {
93 const auto bit_offset{fmt::format("{}", ((offset.U32() / 2) % 2) * 16)};
94 GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset);
95 } else {
96 const auto offset_var{ctx.var_alloc.Consume(offset)};
97 const auto bit_offset{fmt::format("(({}>>1)%2)*16", offset_var)};
98 GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset);
99 }
100}
101
102u32 TexCoordIndex(IR::Attribute attr) {
103 return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4;
104}
105} // Anonymous namespace
106
107void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
108 const IR::Value& offset) {
109 GetCbuf8(ctx, inst, binding, offset, "ftou");
110}
111
112void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
113 const IR::Value& offset) {
114 GetCbuf8(ctx, inst, binding, offset, "ftoi");
115}
116
117void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
118 const IR::Value& offset) {
119 GetCbuf16(ctx, inst, binding, offset, "ftou");
120}
121
122void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
123 const IR::Value& offset) {
124 GetCbuf16(ctx, inst, binding, offset, "ftoi");
125}
126
127void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
128 const IR::Value& offset) {
129 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
130 GetCbuf(ctx, ret, binding, offset, 32, "ftou");
131}
132
133void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
134 const IR::Value& offset) {
135 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)};
136 GetCbuf(ctx, ret, binding, offset, 32);
137}
138
139void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
140 const IR::Value& offset) {
141 const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
142 if (offset.IsImmediate()) {
143 static constexpr u32 cbuf_size{0x10000};
144 const u32 u32_offset{offset.U32()};
145 const s32 signed_offset{static_cast<s32>(offset.U32())};
146 if (signed_offset < 0 || u32_offset > cbuf_size) {
147 LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds");
148 ctx.AddU32x2("{}=uvec2(0u);", inst);
149 return;
150 }
151 if (u32_offset % 2 == 0) {
152 ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16,
153 OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4));
154 } else {
155 ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, u32_offset / 16,
156 OffsetSwizzle(u32_offset), cbuf, (u32_offset + 4) / 16,
157 OffsetSwizzle(u32_offset + 4));
158 }
159 return;
160 }
161 const auto offset_var{ctx.var_alloc.Consume(offset)};
162 if (!ctx.profile.has_gl_component_indexing_bug) {
163 ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));",
164 inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var);
165 return;
166 }
167 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
168 const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
169 for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
170 ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset,
171 swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var,
172 "xyzw"[(swizzle + 1) % 4]);
173 }
174}
175
176void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
177 std::string_view vertex) {
178 const u32 element{static_cast<u32>(attr) % 4};
179 const char swizzle{"xyzw"[element]};
180 if (IR::IsGeneric(attr)) {
181 const u32 index{IR::GenericAttributeIndex(attr)};
182 if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) {
183 if (element == 3) {
184 ctx.AddF32("{}=1.f;", inst, attr);
185 } else {
186 ctx.AddF32("{}=0.f;", inst, attr);
187 }
188 return;
189 }
190 ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle);
191 return;
192 }
193 // GLSL only exposes 8 legacy texcoords
194 if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) {
195 LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]",
196 TexCoordIndex(attr));
197 ctx.AddF32("{}=0.f;", inst);
198 return;
199 }
200 if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) {
201 const u32 index{TexCoordIndex(attr)};
202 ctx.AddF32("{}=gl_TexCoord[{}].{};", inst, index, swizzle);
203 return;
204 }
205 switch (attr) {
206 case IR::Attribute::PrimitiveId:
207 ctx.AddF32("{}=itof(gl_PrimitiveID);", inst);
208 break;
209 case IR::Attribute::PositionX:
210 case IR::Attribute::PositionY:
211 case IR::Attribute::PositionZ:
212 case IR::Attribute::PositionW: {
213 const bool is_array{IsInputArray(ctx.stage)};
214 const auto input_decorator{is_array ? fmt::format("gl_in[{}].", vertex) : ""};
215 ctx.AddF32("{}={}{}.{};", inst, input_decorator, ctx.position_name, swizzle);
216 break;
217 }
218 case IR::Attribute::ColorFrontDiffuseR:
219 case IR::Attribute::ColorFrontDiffuseG:
220 case IR::Attribute::ColorFrontDiffuseB:
221 case IR::Attribute::ColorFrontDiffuseA:
222 if (ctx.stage == Stage::Fragment) {
223 ctx.AddF32("{}=gl_Color.{};", inst, swizzle);
224 } else {
225 ctx.AddF32("{}=gl_FrontColor.{};", inst, swizzle);
226 }
227 break;
228 case IR::Attribute::PointSpriteS:
229 case IR::Attribute::PointSpriteT:
230 ctx.AddF32("{}=gl_PointCoord.{};", inst, swizzle);
231 break;
232 case IR::Attribute::TessellationEvaluationPointU:
233 case IR::Attribute::TessellationEvaluationPointV:
234 ctx.AddF32("{}=gl_TessCoord.{};", inst, swizzle);
235 break;
236 case IR::Attribute::InstanceId:
237 ctx.AddF32("{}=itof(gl_InstanceID);", inst);
238 break;
239 case IR::Attribute::VertexId:
240 ctx.AddF32("{}=itof(gl_VertexID);", inst);
241 break;
242 case IR::Attribute::FrontFace:
243 ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst);
244 break;
245 default:
246 throw NotImplementedException("Get attribute {}", attr);
247 }
248}
249
250void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
251 [[maybe_unused]] std::string_view vertex) {
252 if (IR::IsGeneric(attr)) {
253 const u32 index{IR::GenericAttributeIndex(attr)};
254 const u32 attr_element{IR::GenericAttributeElement(attr)};
255 const GenericElementInfo& info{ctx.output_generics.at(index).at(attr_element)};
256 const auto output_decorator{OutputVertexIndex(ctx)};
257 if (info.num_components == 1) {
258 ctx.Add("{}{}={};", info.name, output_decorator, value);
259 } else {
260 const u32 index_element{attr_element - info.first_element};
261 ctx.Add("{}{}.{}={};", info.name, output_decorator, "xyzw"[index_element], value);
262 }
263 return;
264 }
265 const u32 element{static_cast<u32>(attr) % 4};
266 const char swizzle{"xyzw"[element]};
267 // GLSL only exposes 8 legacy texcoords
268 if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) {
269 LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]",
270 TexCoordIndex(attr));
271 return;
272 }
273 if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) {
274 const u32 index{TexCoordIndex(attr)};
275 ctx.Add("gl_TexCoord[{}].{}={};", index, swizzle, value);
276 return;
277 }
278 switch (attr) {
279 case IR::Attribute::Layer:
280 if (ctx.stage != Stage::Geometry &&
281 !ctx.profile.support_viewport_index_layer_non_geometry) {
282 LOG_WARNING(Shader_GLSL, "Shader stores viewport layer but device does not support "
283 "viewport layer extension");
284 break;
285 }
286 ctx.Add("gl_Layer=ftoi({});", value);
287 break;
288 case IR::Attribute::ViewportIndex:
289 if (ctx.stage != Stage::Geometry &&
290 !ctx.profile.support_viewport_index_layer_non_geometry) {
291 LOG_WARNING(Shader_GLSL, "Shader stores viewport index but device does not support "
292 "viewport layer extension");
293 break;
294 }
295 ctx.Add("gl_ViewportIndex=ftoi({});", value);
296 break;
297 case IR::Attribute::ViewportMask:
298 if (ctx.stage != Stage::Geometry && !ctx.profile.support_viewport_mask) {
299 LOG_WARNING(
300 Shader_GLSL,
301 "Shader stores viewport mask but device does not support viewport mask extension");
302 break;
303 }
304 ctx.Add("gl_ViewportMask[0]=ftoi({});", value);
305 break;
306 case IR::Attribute::PointSize:
307 ctx.Add("gl_PointSize={};", value);
308 break;
309 case IR::Attribute::PositionX:
310 case IR::Attribute::PositionY:
311 case IR::Attribute::PositionZ:
312 case IR::Attribute::PositionW:
313 ctx.Add("gl_Position.{}={};", swizzle, value);
314 break;
315 case IR::Attribute::ColorFrontDiffuseR:
316 case IR::Attribute::ColorFrontDiffuseG:
317 case IR::Attribute::ColorFrontDiffuseB:
318 case IR::Attribute::ColorFrontDiffuseA:
319 ctx.Add("gl_FrontColor.{}={};", swizzle, value);
320 break;
321 case IR::Attribute::ColorFrontSpecularR:
322 case IR::Attribute::ColorFrontSpecularG:
323 case IR::Attribute::ColorFrontSpecularB:
324 case IR::Attribute::ColorFrontSpecularA:
325 ctx.Add("gl_FrontSecondaryColor.{}={};", swizzle, value);
326 break;
327 case IR::Attribute::ColorBackDiffuseR:
328 case IR::Attribute::ColorBackDiffuseG:
329 case IR::Attribute::ColorBackDiffuseB:
330 case IR::Attribute::ColorBackDiffuseA:
331 ctx.Add("gl_BackColor.{}={};", swizzle, value);
332 break;
333 case IR::Attribute::ColorBackSpecularR:
334 case IR::Attribute::ColorBackSpecularG:
335 case IR::Attribute::ColorBackSpecularB:
336 case IR::Attribute::ColorBackSpecularA:
337 ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value);
338 break;
339 case IR::Attribute::FogCoordinate:
340 ctx.Add("gl_FogFragCoord={};", value);
341 break;
342 case IR::Attribute::ClipDistance0:
343 case IR::Attribute::ClipDistance1:
344 case IR::Attribute::ClipDistance2:
345 case IR::Attribute::ClipDistance3:
346 case IR::Attribute::ClipDistance4:
347 case IR::Attribute::ClipDistance5:
348 case IR::Attribute::ClipDistance6:
349 case IR::Attribute::ClipDistance7: {
350 const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)};
351 ctx.Add("gl_ClipDistance[{}]={};", index, value);
352 break;
353 }
354 default:
355 throw NotImplementedException("Set attribute {}", attr);
356 }
357}
358
359void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
360 std::string_view vertex) {
361 const bool is_array{ctx.stage == Stage::Geometry};
362 const auto vertex_arg{is_array ? fmt::format(",{}", vertex) : ""};
363 ctx.AddF32("{}=IndexedAttrLoad(int({}){});", inst, offset, vertex_arg);
364}
365
366void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx,
367 [[maybe_unused]] std::string_view offset,
368 [[maybe_unused]] std::string_view value,
369 [[maybe_unused]] std::string_view vertex) {
370 NotImplemented();
371}
372
373void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) {
374 if (!IR::IsGeneric(patch)) {
375 throw NotImplementedException("Non-generic patch load");
376 }
377 const u32 index{IR::GenericPatchIndex(patch)};
378 const u32 element{IR::GenericPatchElement(patch)};
379 const char swizzle{"xyzw"[element]};
380 ctx.AddF32("{}=patch{}.{};", inst, index, swizzle);
381}
382
383void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) {
384 if (IR::IsGeneric(patch)) {
385 const u32 index{IR::GenericPatchIndex(patch)};
386 const u32 element{IR::GenericPatchElement(patch)};
387 ctx.Add("patch{}.{}={};", index, "xyzw"[element], value);
388 return;
389 }
390 switch (patch) {
391 case IR::Patch::TessellationLodLeft:
392 case IR::Patch::TessellationLodRight:
393 case IR::Patch::TessellationLodTop:
394 case IR::Patch::TessellationLodBottom: {
395 const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
396 ctx.Add("gl_TessLevelOuter[{}]={};", index, value);
397 break;
398 }
399 case IR::Patch::TessellationLodInteriorU:
400 ctx.Add("gl_TessLevelInner[0]={};", value);
401 break;
402 case IR::Patch::TessellationLodInteriorV:
403 ctx.Add("gl_TessLevelInner[1]={};", value);
404 break;
405 default:
406 throw NotImplementedException("Patch {}", patch);
407 }
408}
409
410void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) {
411 const char swizzle{"xyzw"[component]};
412 ctx.Add("frag_color{}.{}={};", index, swizzle, value);
413}
414
415void EmitSetSampleMask(EmitContext& ctx, std::string_view value) {
416 ctx.Add("gl_SampleMask[0]=int({});", value);
417}
418
419void EmitSetFragDepth(EmitContext& ctx, std::string_view value) {
420 ctx.Add("gl_FragDepth={};", value);
421}
422
423void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) {
424 ctx.AddU32x3("{}=gl_LocalInvocationID;", inst);
425}
426
427void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) {
428 ctx.AddU32x3("{}=gl_WorkGroupID;", inst);
429}
430
431void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) {
432 ctx.AddU32("{}=uint(gl_InvocationID);", inst);
433}
434
435void EmitSampleId(EmitContext& ctx, IR::Inst& inst) {
436 ctx.AddU32("{}=uint(gl_SampleID);", inst);
437}
438
439void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) {
440 ctx.AddU1("{}=gl_HelperInvocation;", inst);
441}
442
443void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
444 ctx.uses_y_direction = true;
445 ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst);
446}
447
448void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) {
449 ctx.AddU32("{}=lmem[{}];", inst, word_offset);
450}
451
452void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) {
453 ctx.Add("lmem[{}]={};", word_offset, value);
454}
455
456} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp
new file mode 100644
index 000000000..53f8896be
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp
@@ -0,0 +1,21 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/exception.h"
10
11namespace Shader::Backend::GLSL {
12
13void EmitJoin(EmitContext&) {
14 throw NotImplementedException("Join shouldn't be emitted");
15}
16
17void EmitDemoteToHelperInvocation(EmitContext& ctx) {
18 ctx.Add("discard;");
19}
20
21} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp
new file mode 100644
index 000000000..eeae6562c
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp
@@ -0,0 +1,230 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::Backend::GLSL {
12void EmitConvertS16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
13 [[maybe_unused]] std::string_view value) {
14 NotImplemented();
15}
16
17void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
18 ctx.AddU32("{}=(int({})&0xffff)|(bitfieldExtract(int({}),31,1)<<15);", inst, value, value);
19}
20
21void EmitConvertS16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
22 [[maybe_unused]] std::string_view value) {
23 NotImplemented();
24}
25
26void EmitConvertS32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
27 [[maybe_unused]] std::string_view value) {
28 NotImplemented();
29}
30
31void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
32 ctx.AddU32("{}=int({});", inst, value);
33}
34
35void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
36 ctx.AddU32("{}=int({});", inst, value);
37}
38
39void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
40 [[maybe_unused]] std::string_view value) {
41 NotImplemented();
42}
43
44void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
45 ctx.AddU64("{}=int64_t({});", inst, value);
46}
47
48void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
49 ctx.AddU64("{}=int64_t({});", inst, value);
50}
51
52void EmitConvertU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
53 [[maybe_unused]] std::string_view value) {
54 NotImplemented();
55}
56
57void EmitConvertU16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
58 [[maybe_unused]] std::string_view value) {
59 NotImplemented();
60}
61
62void EmitConvertU16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
63 [[maybe_unused]] std::string_view value) {
64 NotImplemented();
65}
66
67void EmitConvertU32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
68 [[maybe_unused]] std::string_view value) {
69 NotImplemented();
70}
71
72void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
73 ctx.AddU32("{}=uint({});", inst, value);
74}
75
76void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
77 ctx.AddU32("{}=uint({});", inst, value);
78}
79
80void EmitConvertU64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
81 [[maybe_unused]] std::string_view value) {
82 NotImplemented();
83}
84
85void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
86 ctx.AddU64("{}=uint64_t({});", inst, value);
87}
88
89void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
90 ctx.AddU64("{}=uint64_t({});", inst, value);
91}
92
93void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
94 ctx.AddU64("{}=uint64_t({});", inst, value);
95}
96
97void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
98 ctx.AddU32("{}=uint({});", inst, value);
99}
100
101void EmitConvertF16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
102 [[maybe_unused]] std::string_view value) {
103 NotImplemented();
104}
105
106void EmitConvertF32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
107 [[maybe_unused]] std::string_view value) {
108 NotImplemented();
109}
110
111void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
112 ctx.AddF32("{}=float({});", inst, value);
113}
114
115void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
116 ctx.AddF64("{}=double({});", inst, value);
117}
118
119void EmitConvertF16S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
120 [[maybe_unused]] std::string_view value) {
121 NotImplemented();
122}
123
124void EmitConvertF16S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
125 [[maybe_unused]] std::string_view value) {
126 NotImplemented();
127}
128
129void EmitConvertF16S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
130 [[maybe_unused]] std::string_view value) {
131 NotImplemented();
132}
133
134void EmitConvertF16S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
135 [[maybe_unused]] std::string_view value) {
136 NotImplemented();
137}
138
139void EmitConvertF16U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
140 [[maybe_unused]] std::string_view value) {
141 NotImplemented();
142}
143
144void EmitConvertF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
145 [[maybe_unused]] std::string_view value) {
146 NotImplemented();
147}
148
149void EmitConvertF16U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
150 [[maybe_unused]] std::string_view value) {
151 NotImplemented();
152}
153
154void EmitConvertF16U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
155 [[maybe_unused]] std::string_view value) {
156 NotImplemented();
157}
158
159void EmitConvertF32S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
160 [[maybe_unused]] std::string_view value) {
161 NotImplemented();
162}
163
164void EmitConvertF32S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
165 [[maybe_unused]] std::string_view value) {
166 NotImplemented();
167}
168
169void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
170 ctx.AddF32("{}=float(int({}));", inst, value);
171}
172
173void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
174 ctx.AddF32("{}=float(int64_t({}));", inst, value);
175}
176
177void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
178 [[maybe_unused]] std::string_view value) {
179 NotImplemented();
180}
181
182void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
183 ctx.AddF32("{}=float({}&0xffff);", inst, value);
184}
185
186void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
187 ctx.AddF32("{}=float({});", inst, value);
188}
189
190void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
191 ctx.AddF32("{}=float({});", inst, value);
192}
193
194void EmitConvertF64S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
195 [[maybe_unused]] std::string_view value) {
196 NotImplemented();
197}
198
199void EmitConvertF64S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
200 [[maybe_unused]] std::string_view value) {
201 NotImplemented();
202}
203
204void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
205 ctx.AddF64("{}=double(int({}));", inst, value);
206}
207
208void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
209 ctx.AddF64("{}=double(int64_t({}));", inst, value);
210}
211
212void EmitConvertF64U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
213 [[maybe_unused]] std::string_view value) {
214 NotImplemented();
215}
216
217void EmitConvertF64U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
218 [[maybe_unused]] std::string_view value) {
219 NotImplemented();
220}
221
222void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
223 ctx.AddF64("{}=double({});", inst, value);
224}
225
226void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
227 ctx.AddF64("{}=double({});", inst, value);
228}
229
230} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp
new file mode 100644
index 000000000..d423bfb1b
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp
@@ -0,0 +1,456 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/ir/value.h"
11
12namespace Shader::Backend::GLSL {
13namespace {
14void Compare(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs,
15 std::string_view op, bool ordered) {
16 const auto nan_op{ordered ? "&&!" : "||"};
17 ctx.AddU1("{}={}{}{}"
18 "{}isnan({}){}isnan({});",
19 inst, lhs, op, rhs, nan_op, lhs, nan_op, rhs);
20}
21
22bool IsPrecise(const IR::Inst& inst) {
23 return inst.Flags<IR::FpControl>().no_contraction;
24}
25} // Anonymous namespace
26
27void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
28 [[maybe_unused]] std::string_view value) {
29 NotImplemented();
30}
31
32void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
33 ctx.AddF32("{}=abs({});", inst, value);
34}
35
36void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
37 ctx.AddF64("{}=abs({});", inst, value);
38}
39
40void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
41 [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) {
42 NotImplemented();
43}
44
45void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
46 if (IsPrecise(inst)) {
47 ctx.AddPrecF32("{}={}+{};", inst, a, b);
48 } else {
49 ctx.AddF32("{}={}+{};", inst, a, b);
50 }
51}
52
53void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
54 if (IsPrecise(inst)) {
55 ctx.AddPrecF64("{}={}+{};", inst, a, b);
56 } else {
57 ctx.AddF64("{}={}+{};", inst, a, b);
58 }
59}
60
61void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
62 [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b,
63 [[maybe_unused]] std::string_view c) {
64 NotImplemented();
65}
66
67void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
68 std::string_view c) {
69 if (IsPrecise(inst)) {
70 ctx.AddPrecF32("{}=fma({},{},{});", inst, a, b, c);
71 } else {
72 ctx.AddF32("{}=fma({},{},{});", inst, a, b, c);
73 }
74}
75
76void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
77 std::string_view c) {
78 if (IsPrecise(inst)) {
79 ctx.AddPrecF64("{}=fma({},{},{});", inst, a, b, c);
80 } else {
81 ctx.AddF64("{}=fma({},{},{});", inst, a, b, c);
82 }
83}
84
85void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
86 ctx.AddF32("{}=max({},{});", inst, a, b);
87}
88
89void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
90 ctx.AddF64("{}=max({},{});", inst, a, b);
91}
92
93void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
94 ctx.AddF32("{}=min({},{});", inst, a, b);
95}
96
97void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
98 ctx.AddF64("{}=min({},{});", inst, a, b);
99}
100
101void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
102 [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) {
103 NotImplemented();
104}
105
106void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
107 if (IsPrecise(inst)) {
108 ctx.AddPrecF32("{}={}*{};", inst, a, b);
109 } else {
110 ctx.AddF32("{}={}*{};", inst, a, b);
111 }
112}
113
114void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
115 if (IsPrecise(inst)) {
116 ctx.AddPrecF64("{}={}*{};", inst, a, b);
117 } else {
118 ctx.AddF64("{}={}*{};", inst, a, b);
119 }
120}
121
122void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
123 [[maybe_unused]] std::string_view value) {
124 NotImplemented();
125}
126
127void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
128 ctx.AddF32("{}=-({});", inst, value);
129}
130
131void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
132 ctx.AddF64("{}=-({});", inst, value);
133}
134
135void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
136 ctx.AddF32("{}=sin({});", inst, value);
137}
138
139void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
140 ctx.AddF32("{}=cos({});", inst, value);
141}
142
143void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
144 ctx.AddF32("{}=exp2({});", inst, value);
145}
146
147void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
148 ctx.AddF32("{}=log2({});", inst, value);
149}
150
151void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
152 ctx.AddF32("{}=(1.0f)/{};", inst, value);
153}
154
155void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
156 ctx.AddF64("{}=1.0/{};", inst, value);
157}
158
159void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
160 [[maybe_unused]] std::string_view value) {
161 ctx.AddF32("{}=inversesqrt({});", inst, value);
162}
163
164void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
165 [[maybe_unused]] std::string_view value) {
166 NotImplemented();
167}
168
169void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
170 ctx.AddF32("{}=sqrt({});", inst, value);
171}
172
173void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
174 [[maybe_unused]] std::string_view value) {
175 NotImplemented();
176}
177
178void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
179 ctx.AddF32("{}=min(max({},0.0),1.0);", inst, value);
180}
181
182void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
183 ctx.AddF64("{}=min(max({},0.0),1.0);", inst, value);
184}
185
186void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
187 [[maybe_unused]] std::string_view value,
188 [[maybe_unused]] std::string_view min_value,
189 [[maybe_unused]] std::string_view max_value) {
190 NotImplemented();
191}
192
193void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value,
194 std::string_view min_value, std::string_view max_value) {
195 // GLSL's clamp does not produce desirable results
196 ctx.AddF32("{}=min(max({},float({})),float({}));", inst, value, min_value, max_value);
197}
198
199void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value,
200 std::string_view min_value, std::string_view max_value) {
201 // GLSL's clamp does not produce desirable results
202 ctx.AddF64("{}=min(max({},double({})),double({}));", inst, value, min_value, max_value);
203}
204
205void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
206 [[maybe_unused]] std::string_view value) {
207 NotImplemented();
208}
209
210void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
211 ctx.AddF32("{}=roundEven({});", inst, value);
212}
213
214void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
215 ctx.AddF64("{}=roundEven({});", inst, value);
216}
217
218void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
219 [[maybe_unused]] std::string_view value) {
220 NotImplemented();
221}
222
223void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
224 ctx.AddF32("{}=floor({});", inst, value);
225}
226
227void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
228 ctx.AddF64("{}=floor({});", inst, value);
229}
230
231void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
232 [[maybe_unused]] std::string_view value) {
233 NotImplemented();
234}
235
236void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
237 ctx.AddF32("{}=ceil({});", inst, value);
238}
239
240void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
241 ctx.AddF64("{}=ceil({});", inst, value);
242}
243
244void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
245 [[maybe_unused]] std::string_view value) {
246 NotImplemented();
247}
248
249void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
250 ctx.AddF32("{}=trunc({});", inst, value);
251}
252
253void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
254 ctx.AddF64("{}=trunc({});", inst, value);
255}
256
257void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
258 [[maybe_unused]] std::string_view rhs) {
259 NotImplemented();
260}
261
262void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
263 std::string_view rhs) {
264 Compare(ctx, inst, lhs, rhs, "==", true);
265}
266
267void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
268 std::string_view rhs) {
269 Compare(ctx, inst, lhs, rhs, "==", true);
270}
271
272void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
273 [[maybe_unused]] std::string_view rhs) {
274 NotImplemented();
275}
276
277void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
278 std::string_view rhs) {
279 Compare(ctx, inst, lhs, rhs, "==", false);
280}
281
282void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
283 std::string_view rhs) {
284 Compare(ctx, inst, lhs, rhs, "==", false);
285}
286
287void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
288 [[maybe_unused]] std::string_view rhs) {
289 NotImplemented();
290}
291
292void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
293 std::string_view rhs) {
294 Compare(ctx, inst, lhs, rhs, "!=", true);
295}
296
297void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
298 std::string_view rhs) {
299 Compare(ctx, inst, lhs, rhs, "!=", true);
300}
301
302void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
303 [[maybe_unused]] std::string_view rhs) {
304 NotImplemented();
305}
306
307void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
308 std::string_view rhs) {
309 Compare(ctx, inst, lhs, rhs, "!=", false);
310}
311
312void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
313 std::string_view rhs) {
314 Compare(ctx, inst, lhs, rhs, "!=", false);
315}
316
317void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
318 [[maybe_unused]] std::string_view rhs) {
319 NotImplemented();
320}
321
322void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
323 std::string_view rhs) {
324 Compare(ctx, inst, lhs, rhs, "<", true);
325}
326
327void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
328 std::string_view rhs) {
329 Compare(ctx, inst, lhs, rhs, "<", true);
330}
331
332void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
333 [[maybe_unused]] std::string_view rhs) {
334 NotImplemented();
335}
336
337void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
338 std::string_view rhs) {
339 Compare(ctx, inst, lhs, rhs, "<", false);
340}
341
342void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
343 std::string_view rhs) {
344 Compare(ctx, inst, lhs, rhs, "<", false);
345}
346
347void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx,
348 [[maybe_unused]] std::string_view lhs,
349 [[maybe_unused]] std::string_view rhs) {
350 NotImplemented();
351}
352
353void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
354 std::string_view rhs) {
355 Compare(ctx, inst, lhs, rhs, ">", true);
356}
357
358void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
359 std::string_view rhs) {
360 Compare(ctx, inst, lhs, rhs, ">", true);
361}
362
363void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx,
364 [[maybe_unused]] std::string_view lhs,
365 [[maybe_unused]] std::string_view rhs) {
366 NotImplemented();
367}
368
369void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
370 std::string_view rhs) {
371 Compare(ctx, inst, lhs, rhs, ">", false);
372}
373
374void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
375 std::string_view rhs) {
376 Compare(ctx, inst, lhs, rhs, ">", false);
377}
378
379void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx,
380 [[maybe_unused]] std::string_view lhs,
381 [[maybe_unused]] std::string_view rhs) {
382 NotImplemented();
383}
384
385void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
386 std::string_view rhs) {
387 Compare(ctx, inst, lhs, rhs, "<=", true);
388}
389
390void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
391 std::string_view rhs) {
392 Compare(ctx, inst, lhs, rhs, "<=", true);
393}
394
395void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx,
396 [[maybe_unused]] std::string_view lhs,
397 [[maybe_unused]] std::string_view rhs) {
398 NotImplemented();
399}
400
401void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
402 std::string_view rhs) {
403 Compare(ctx, inst, lhs, rhs, "<=", false);
404}
405
406void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
407 std::string_view rhs) {
408 Compare(ctx, inst, lhs, rhs, "<=", false);
409}
410
411void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx,
412 [[maybe_unused]] std::string_view lhs,
413 [[maybe_unused]] std::string_view rhs) {
414 NotImplemented();
415}
416
417void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
418 std::string_view rhs) {
419 Compare(ctx, inst, lhs, rhs, ">=", true);
420}
421
422void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
423 std::string_view rhs) {
424 Compare(ctx, inst, lhs, rhs, ">=", true);
425}
426
427void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx,
428 [[maybe_unused]] std::string_view lhs,
429 [[maybe_unused]] std::string_view rhs) {
430 NotImplemented();
431}
432
433void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
434 std::string_view rhs) {
435 Compare(ctx, inst, lhs, rhs, ">=", false);
436}
437
438void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
439 std::string_view rhs) {
440 Compare(ctx, inst, lhs, rhs, ">=", false);
441}
442
443void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
444 [[maybe_unused]] std::string_view value) {
445 NotImplemented();
446}
447
448void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
449 ctx.AddU1("{}=isnan({});", inst, value);
450}
451
452void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
453 ctx.AddU1("{}=isnan({});", inst, value);
454}
455
456} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
new file mode 100644
index 000000000..447eb8e0a
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
@@ -0,0 +1,799 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/ir/value.h"
11#include "shader_recompiler/profile.h"
12
13namespace Shader::Backend::GLSL {
14namespace {
15std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) {
16 const auto def{info.type == TextureType::Buffer ? ctx.texture_buffers.at(info.descriptor_index)
17 : ctx.textures.at(info.descriptor_index)};
18 const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""};
19 return fmt::format("tex{}{}", def.binding, index_offset);
20}
21
22std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) {
23 const auto def{info.type == TextureType::Buffer ? ctx.image_buffers.at(info.descriptor_index)
24 : ctx.images.at(info.descriptor_index)};
25 const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""};
26 return fmt::format("img{}{}", def.binding, index_offset);
27}
28
29std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info) {
30 switch (info.type) {
31 case TextureType::Color1D:
32 case TextureType::Buffer:
33 return fmt::format("int({})", value);
34 case TextureType::ColorArray1D:
35 case TextureType::Color2D:
36 case TextureType::ColorArray2D:
37 return fmt::format("ivec2({})", value);
38 case TextureType::Color3D:
39 case TextureType::ColorCube:
40 return fmt::format("ivec3({})", value);
41 case TextureType::ColorArrayCube:
42 return fmt::format("ivec4({})", value);
43 default:
44 throw NotImplementedException("Integer cast for TextureType {}", info.type.Value());
45 }
46}
47
48std::string CoordsCastToInt(std::string_view value, const IR::TextureInstInfo& info) {
49 switch (info.type) {
50 case TextureType::Color1D:
51 case TextureType::Buffer:
52 return fmt::format("int({})", value);
53 case TextureType::ColorArray1D:
54 case TextureType::Color2D:
55 return fmt::format("ivec2({})", value);
56 case TextureType::ColorArray2D:
57 case TextureType::Color3D:
58 case TextureType::ColorCube:
59 return fmt::format("ivec3({})", value);
60 case TextureType::ColorArrayCube:
61 return fmt::format("ivec4({})", value);
62 default:
63 throw NotImplementedException("TexelFetchCast type {}", info.type.Value());
64 }
65}
66
67bool NeedsShadowLodExt(TextureType type) {
68 switch (type) {
69 case TextureType::ColorArray2D:
70 case TextureType::ColorCube:
71 case TextureType::ColorArrayCube:
72 return true;
73 default:
74 return false;
75 }
76}
77
78std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) {
79 if (offset.IsImmediate()) {
80 return fmt::format("int({})", offset.U32());
81 }
82 IR::Inst* const inst{offset.InstRecursive()};
83 if (inst->AreAllArgsImmediates()) {
84 switch (inst->GetOpcode()) {
85 case IR::Opcode::CompositeConstructU32x2:
86 return fmt::format("ivec2({},{})", inst->Arg(0).U32(), inst->Arg(1).U32());
87 case IR::Opcode::CompositeConstructU32x3:
88 return fmt::format("ivec3({},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(),
89 inst->Arg(2).U32());
90 case IR::Opcode::CompositeConstructU32x4:
91 return fmt::format("ivec4({},{},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(),
92 inst->Arg(2).U32(), inst->Arg(3).U32());
93 default:
94 break;
95 }
96 }
97 const bool has_var_aoffi{ctx.profile.support_gl_variable_aoffi};
98 if (!has_var_aoffi) {
99 LOG_WARNING(Shader_GLSL, "Device does not support variable texture offsets, STUBBING");
100 }
101 const auto offset_str{has_var_aoffi ? ctx.var_alloc.Consume(offset) : "0"};
102 switch (offset.Type()) {
103 case IR::Type::U32:
104 return fmt::format("int({})", offset_str);
105 case IR::Type::U32x2:
106 return fmt::format("ivec2({})", offset_str);
107 case IR::Type::U32x3:
108 return fmt::format("ivec3({})", offset_str);
109 case IR::Type::U32x4:
110 return fmt::format("ivec4({})", offset_str);
111 default:
112 throw NotImplementedException("Offset type {}", offset.Type());
113 }
114}
115
116std::string PtpOffsets(const IR::Value& offset, const IR::Value& offset2) {
117 const std::array values{offset.InstRecursive(), offset2.InstRecursive()};
118 if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) {
119 LOG_WARNING(Shader_GLSL, "Not all arguments in PTP are immediate, STUBBING");
120 return "ivec2[](ivec2(0), ivec2(1), ivec2(2), ivec2(3))";
121 }
122 const IR::Opcode opcode{values[0]->GetOpcode()};
123 if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
124 throw LogicError("Invalid PTP arguments");
125 }
126 auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }};
127
128 return fmt::format("ivec2[](ivec2({},{}),ivec2({},{}),ivec2({},{}),ivec2({},{}))", read(0, 0),
129 read(0, 1), read(0, 2), read(0, 3), read(1, 0), read(1, 1), read(1, 2),
130 read(1, 3));
131}
132
133IR::Inst* PrepareSparse(IR::Inst& inst) {
134 const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
135 if (sparse_inst) {
136 sparse_inst->Invalidate();
137 }
138 return sparse_inst;
139}
140} // Anonymous namespace
141
142void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
143 std::string_view coords, std::string_view bias_lc,
144 const IR::Value& offset) {
145 const auto info{inst.Flags<IR::TextureInstInfo>()};
146 if (info.has_lod_clamp) {
147 throw NotImplementedException("EmitImageSampleImplicitLod Lod clamp samples");
148 }
149 const auto texture{Texture(ctx, info, index)};
150 const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""};
151 const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
152 const auto sparse_inst{PrepareSparse(inst)};
153 const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
154 if (sparse_inst && !supports_sparse) {
155 LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
156 ctx.AddU1("{}=true;", *sparse_inst);
157 }
158 if (!sparse_inst || !supports_sparse) {
159 if (!offset.IsEmpty()) {
160 const auto offset_str{GetOffsetVec(ctx, offset)};
161 if (ctx.stage == Stage::Fragment) {
162 ctx.Add("{}=textureOffset({},{},{}{});", texel, texture, coords, offset_str, bias);
163 } else {
164 ctx.Add("{}=textureLodOffset({},{},0.0,{});", texel, texture, coords, offset_str);
165 }
166 } else {
167 if (ctx.stage == Stage::Fragment) {
168 ctx.Add("{}=texture({},{}{});", texel, texture, coords, bias);
169 } else {
170 ctx.Add("{}=textureLod({},{},0.0);", texel, texture, coords);
171 }
172 }
173 return;
174 }
175 if (!offset.IsEmpty()) {
176 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureOffsetARB({},{},{},{}{}));",
177 *sparse_inst, texture, coords, GetOffsetVec(ctx, offset), texel, bias);
178 } else {
179 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureARB({},{},{}{}));", *sparse_inst,
180 texture, coords, texel, bias);
181 }
182}
183
184void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
185 std::string_view coords, std::string_view lod_lc,
186 const IR::Value& offset) {
187 const auto info{inst.Flags<IR::TextureInstInfo>()};
188 if (info.has_bias) {
189 throw NotImplementedException("EmitImageSampleExplicitLod Bias texture samples");
190 }
191 if (info.has_lod_clamp) {
192 throw NotImplementedException("EmitImageSampleExplicitLod Lod clamp samples");
193 }
194 const auto texture{Texture(ctx, info, index)};
195 const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
196 const auto sparse_inst{PrepareSparse(inst)};
197 const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
198 if (sparse_inst && !supports_sparse) {
199 LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
200 ctx.AddU1("{}=true;", *sparse_inst);
201 }
202 if (!sparse_inst || !supports_sparse) {
203 if (!offset.IsEmpty()) {
204 ctx.Add("{}=textureLodOffset({},{},{},{});", texel, texture, coords, lod_lc,
205 GetOffsetVec(ctx, offset));
206 } else {
207 ctx.Add("{}=textureLod({},{},{});", texel, texture, coords, lod_lc);
208 }
209 return;
210 }
211 if (!offset.IsEmpty()) {
212 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));",
213 *sparse_inst, texture, CastToIntVec(coords, info), lod_lc,
214 GetOffsetVec(ctx, offset), texel);
215 } else {
216 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureLodARB({},{},{},{}));", *sparse_inst,
217 texture, coords, lod_lc, texel);
218 }
219}
220
221void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
222 std::string_view coords, std::string_view dref,
223 std::string_view bias_lc, const IR::Value& offset) {
224 const auto info{inst.Flags<IR::TextureInstInfo>()};
225 const auto sparse_inst{PrepareSparse(inst)};
226 if (sparse_inst) {
227 throw NotImplementedException("EmitImageSampleDrefImplicitLod Sparse texture samples");
228 }
229 if (info.has_bias) {
230 throw NotImplementedException("EmitImageSampleDrefImplicitLod Bias texture samples");
231 }
232 if (info.has_lod_clamp) {
233 throw NotImplementedException("EmitImageSampleDrefImplicitLod Lod clamp samples");
234 }
235 const auto texture{Texture(ctx, info, index)};
236 const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""};
237 const bool needs_shadow_ext{NeedsShadowLodExt(info.type)};
238 const auto cast{needs_shadow_ext ? "vec4" : "vec3"};
239 const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod &&
240 ctx.stage != Stage::Fragment && needs_shadow_ext};
241 if (use_grad) {
242 LOG_WARNING(Shader_GLSL,
243 "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback");
244 if (info.type == TextureType::ColorArrayCube) {
245 LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing");
246 ctx.AddF32("{}=0.0f;", inst);
247 return;
248 }
249 const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"};
250 ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref,
251 d_cast, d_cast);
252 return;
253 }
254 if (!offset.IsEmpty()) {
255 const auto offset_str{GetOffsetVec(ctx, offset)};
256 if (ctx.stage == Stage::Fragment) {
257 ctx.AddF32("{}=textureOffset({},{}({},{}),{}{});", inst, texture, cast, coords, dref,
258 offset_str, bias);
259 } else {
260 ctx.AddF32("{}=textureLodOffset({},{}({},{}),0.0,{});", inst, texture, cast, coords,
261 dref, offset_str);
262 }
263 } else {
264 if (ctx.stage == Stage::Fragment) {
265 if (info.type == TextureType::ColorArrayCube) {
266 ctx.AddF32("{}=texture({},vec4({}),{});", inst, texture, coords, dref);
267 } else {
268 ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias);
269 }
270 } else {
271 ctx.AddF32("{}=textureLod({},{}({},{}),0.0);", inst, texture, cast, coords, dref);
272 }
273 }
274}
275
276void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
277 std::string_view coords, std::string_view dref,
278 std::string_view lod_lc, const IR::Value& offset) {
279 const auto info{inst.Flags<IR::TextureInstInfo>()};
280 const auto sparse_inst{PrepareSparse(inst)};
281 if (sparse_inst) {
282 throw NotImplementedException("EmitImageSampleDrefExplicitLod Sparse texture samples");
283 }
284 if (info.has_bias) {
285 throw NotImplementedException("EmitImageSampleDrefExplicitLod Bias texture samples");
286 }
287 if (info.has_lod_clamp) {
288 throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples");
289 }
290 const auto texture{Texture(ctx, info, index)};
291 const bool needs_shadow_ext{NeedsShadowLodExt(info.type)};
292 const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext};
293 const auto cast{needs_shadow_ext ? "vec4" : "vec3"};
294 if (use_grad) {
295 LOG_WARNING(Shader_GLSL,
296 "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback");
297 if (info.type == TextureType::ColorArrayCube) {
298 LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing");
299 ctx.AddF32("{}=0.0f;", inst);
300 return;
301 }
302 const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"};
303 ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref,
304 d_cast, d_cast);
305 return;
306 }
307 if (!offset.IsEmpty()) {
308 const auto offset_str{GetOffsetVec(ctx, offset)};
309 if (info.type == TextureType::ColorArrayCube) {
310 ctx.AddF32("{}=textureLodOffset({},{},{},{},{});", inst, texture, coords, dref, lod_lc,
311 offset_str);
312 } else {
313 ctx.AddF32("{}=textureLodOffset({},{}({},{}),{},{});", inst, texture, cast, coords,
314 dref, lod_lc, offset_str);
315 }
316 } else {
317 if (info.type == TextureType::ColorArrayCube) {
318 ctx.AddF32("{}=textureLod({},{},{},{});", inst, texture, coords, dref, lod_lc);
319 } else {
320 ctx.AddF32("{}=textureLod({},{}({},{}),{});", inst, texture, cast, coords, dref,
321 lod_lc);
322 }
323 }
324}
325
326void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
327 std::string_view coords, const IR::Value& offset, const IR::Value& offset2) {
328 const auto info{inst.Flags<IR::TextureInstInfo>()};
329 const auto texture{Texture(ctx, info, index)};
330 const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
331 const auto sparse_inst{PrepareSparse(inst)};
332 const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
333 if (sparse_inst && !supports_sparse) {
334 LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
335 ctx.AddU1("{}=true;", *sparse_inst);
336 }
337 if (!sparse_inst || !supports_sparse) {
338 if (offset.IsEmpty()) {
339 ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords,
340 info.gather_component);
341 return;
342 }
343 if (offset2.IsEmpty()) {
344 ctx.Add("{}=textureGatherOffset({},{},{},int({}));", texel, texture, coords,
345 GetOffsetVec(ctx, offset), info.gather_component);
346 return;
347 }
348 // PTP
349 const auto offsets{PtpOffsets(offset, offset2)};
350 ctx.Add("{}=textureGatherOffsets({},{},{},int({}));", texel, texture, coords, offsets,
351 info.gather_component);
352 return;
353 }
354 if (offset.IsEmpty()) {
355 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},int({})));",
356 *sparse_inst, texture, coords, texel, info.gather_component);
357 return;
358 }
359 if (offset2.IsEmpty()) {
360 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));",
361 *sparse_inst, texture, CastToIntVec(coords, info), GetOffsetVec(ctx, offset),
362 texel, info.gather_component);
363 return;
364 }
365 // PTP
366 const auto offsets{PtpOffsets(offset, offset2)};
367 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));",
368 *sparse_inst, texture, CastToIntVec(coords, info), offsets, texel,
369 info.gather_component);
370}
371
372void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
373 std::string_view coords, const IR::Value& offset, const IR::Value& offset2,
374 std::string_view dref) {
375 const auto info{inst.Flags<IR::TextureInstInfo>()};
376 const auto texture{Texture(ctx, info, index)};
377 const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
378 const auto sparse_inst{PrepareSparse(inst)};
379 const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
380 if (sparse_inst && !supports_sparse) {
381 LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
382 ctx.AddU1("{}=true;", *sparse_inst);
383 }
384 if (!sparse_inst || !supports_sparse) {
385 if (offset.IsEmpty()) {
386 ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref);
387 return;
388 }
389 if (offset2.IsEmpty()) {
390 ctx.Add("{}=textureGatherOffset({},{},{},{});", texel, texture, coords, dref,
391 GetOffsetVec(ctx, offset));
392 return;
393 }
394 // PTP
395 const auto offsets{PtpOffsets(offset, offset2)};
396 ctx.Add("{}=textureGatherOffsets({},{},{},{});", texel, texture, coords, dref, offsets);
397 return;
398 }
399 if (offset.IsEmpty()) {
400 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},{}));", *sparse_inst,
401 texture, coords, dref, texel);
402 return;
403 }
404 if (offset2.IsEmpty()) {
405 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));",
406 *sparse_inst, texture, CastToIntVec(coords, info), dref,
407 GetOffsetVec(ctx, offset), texel);
408 return;
409 }
410 // PTP
411 const auto offsets{PtpOffsets(offset, offset2)};
412 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));",
413 *sparse_inst, texture, CastToIntVec(coords, info), dref, offsets, texel);
414}
415
416void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
417 std::string_view coords, std::string_view offset, std::string_view lod,
418 [[maybe_unused]] std::string_view ms) {
419 const auto info{inst.Flags<IR::TextureInstInfo>()};
420 if (info.has_bias) {
421 throw NotImplementedException("EmitImageFetch Bias texture samples");
422 }
423 if (info.has_lod_clamp) {
424 throw NotImplementedException("EmitImageFetch Lod clamp samples");
425 }
426 const auto texture{Texture(ctx, info, index)};
427 const auto sparse_inst{PrepareSparse(inst)};
428 const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
429 const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
430 if (sparse_inst && !supports_sparse) {
431 LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
432 ctx.AddU1("{}=true;", *sparse_inst);
433 }
434 if (!sparse_inst || !supports_sparse) {
435 if (!offset.empty()) {
436 ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture,
437 CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info));
438 } else {
439 if (info.type == TextureType::Buffer) {
440 ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords);
441 } else {
442 ctx.Add("{}=texelFetch({},{},int({}));", texel, texture,
443 CoordsCastToInt(coords, info), lod);
444 }
445 }
446 return;
447 }
448 if (!offset.empty()) {
449 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));",
450 *sparse_inst, texture, CastToIntVec(coords, info), lod,
451 CastToIntVec(offset, info), texel);
452 } else {
453 ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},int({}),{}));",
454 *sparse_inst, texture, CastToIntVec(coords, info), lod, texel);
455 }
456}
457
458void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
459 std::string_view lod) {
460 const auto info{inst.Flags<IR::TextureInstInfo>()};
461 const auto texture{Texture(ctx, info, index)};
462 switch (info.type) {
463 case TextureType::Color1D:
464 return ctx.AddU32x4(
465 "{}=uvec4(uint(textureSize({},int({}))),0u,0u,uint(textureQueryLevels({})));", inst,
466 texture, lod, texture);
467 case TextureType::ColorArray1D:
468 case TextureType::Color2D:
469 case TextureType::ColorCube:
470 return ctx.AddU32x4(
471 "{}=uvec4(uvec2(textureSize({},int({}))),0u,uint(textureQueryLevels({})));", inst,
472 texture, lod, texture);
473 case TextureType::ColorArray2D:
474 case TextureType::Color3D:
475 case TextureType::ColorArrayCube:
476 return ctx.AddU32x4(
477 "{}=uvec4(uvec3(textureSize({},int({}))),uint(textureQueryLevels({})));", inst, texture,
478 lod, texture);
479 case TextureType::Buffer:
480 throw NotImplementedException("EmitImageQueryDimensions Texture buffers");
481 }
482 throw LogicError("Unspecified image type {}", info.type.Value());
483}
484
485void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
486 std::string_view coords) {
487 const auto info{inst.Flags<IR::TextureInstInfo>()};
488 const auto texture{Texture(ctx, info, index)};
489 return ctx.AddF32x4("{}=vec4(textureQueryLod({},{}),0.0,0.0);", inst, texture, coords);
490}
491
492void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
493 std::string_view coords, const IR::Value& derivatives,
494 const IR::Value& offset, [[maybe_unused]] const IR::Value& lod_clamp) {
495 const auto info{inst.Flags<IR::TextureInstInfo>()};
496 if (info.has_lod_clamp) {
497 throw NotImplementedException("EmitImageGradient Lod clamp samples");
498 }
499 const auto sparse_inst{PrepareSparse(inst)};
500 if (sparse_inst) {
501 throw NotImplementedException("EmitImageGradient Sparse");
502 }
503 if (!offset.IsEmpty()) {
504 throw NotImplementedException("EmitImageGradient offset");
505 }
506 const auto texture{Texture(ctx, info, index)};
507 const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
508 const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
509 const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)};
510 if (multi_component) {
511 ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords,
512 derivatives_vec, derivatives_vec);
513 } else {
514 ctx.Add("{}=textureGrad({},{},float({}.x),float({}.y));", texel, texture, coords,
515 derivatives_vec, derivatives_vec);
516 }
517}
518
519void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
520 std::string_view coords) {
521 const auto info{inst.Flags<IR::TextureInstInfo>()};
522 const auto sparse_inst{PrepareSparse(inst)};
523 if (sparse_inst) {
524 throw NotImplementedException("EmitImageRead Sparse");
525 }
526 const auto image{Image(ctx, info, index)};
527 ctx.AddU32x4("{}=uvec4(imageLoad({},{}));", inst, image, CoordsCastToInt(coords, info));
528}
529
530void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
531 std::string_view coords, std::string_view color) {
532 const auto info{inst.Flags<IR::TextureInstInfo>()};
533 const auto image{Image(ctx, info, index)};
534 ctx.Add("imageStore({},{},{});", image, CoordsCastToInt(coords, info), color);
535}
536
537void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
538 std::string_view coords, std::string_view value) {
539 const auto info{inst.Flags<IR::TextureInstInfo>()};
540 const auto image{Image(ctx, info, index)};
541 ctx.AddU32("{}=imageAtomicAdd({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
542}
543
544void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
545 std::string_view coords, std::string_view value) {
546 const auto info{inst.Flags<IR::TextureInstInfo>()};
547 const auto image{Image(ctx, info, index)};
548 ctx.AddU32("{}=imageAtomicMin({},{},int({}));", inst, image, CoordsCastToInt(coords, info),
549 value);
550}
551
552void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
553 std::string_view coords, std::string_view value) {
554 const auto info{inst.Flags<IR::TextureInstInfo>()};
555 const auto image{Image(ctx, info, index)};
556 ctx.AddU32("{}=imageAtomicMin({},{},uint({}));", inst, image, CoordsCastToInt(coords, info),
557 value);
558}
559
560void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
561 std::string_view coords, std::string_view value) {
562 const auto info{inst.Flags<IR::TextureInstInfo>()};
563 const auto image{Image(ctx, info, index)};
564 ctx.AddU32("{}=imageAtomicMax({},{},int({}));", inst, image, CoordsCastToInt(coords, info),
565 value);
566}
567
568void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
569 std::string_view coords, std::string_view value) {
570 const auto info{inst.Flags<IR::TextureInstInfo>()};
571 const auto image{Image(ctx, info, index)};
572 ctx.AddU32("{}=imageAtomicMax({},{},uint({}));", inst, image, CoordsCastToInt(coords, info),
573 value);
574}
575
576void EmitImageAtomicInc32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view,
577 std::string_view) {
578 NotImplemented();
579}
580
581void EmitImageAtomicDec32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view,
582 std::string_view) {
583 NotImplemented();
584}
585
586void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
587 std::string_view coords, std::string_view value) {
588 const auto info{inst.Flags<IR::TextureInstInfo>()};
589 const auto image{Image(ctx, info, index)};
590 ctx.AddU32("{}=imageAtomicAnd({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
591}
592
593void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
594 std::string_view coords, std::string_view value) {
595 const auto info{inst.Flags<IR::TextureInstInfo>()};
596 const auto image{Image(ctx, info, index)};
597 ctx.AddU32("{}=imageAtomicOr({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
598}
599
600void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
601 std::string_view coords, std::string_view value) {
602 const auto info{inst.Flags<IR::TextureInstInfo>()};
603 const auto image{Image(ctx, info, index)};
604 ctx.AddU32("{}=imageAtomicXor({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
605}
606
607void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
608 std::string_view coords, std::string_view value) {
609 const auto info{inst.Flags<IR::TextureInstInfo>()};
610 const auto image{Image(ctx, info, index)};
611 ctx.AddU32("{}=imageAtomicExchange({},{},{});", inst, image, CoordsCastToInt(coords, info),
612 value);
613}
614
615void EmitBindlessImageSampleImplicitLod(EmitContext&) {
616 NotImplemented();
617}
618
619void EmitBindlessImageSampleExplicitLod(EmitContext&) {
620 NotImplemented();
621}
622
623void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
624 NotImplemented();
625}
626
627void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
628 NotImplemented();
629}
630
631void EmitBindlessImageGather(EmitContext&) {
632 NotImplemented();
633}
634
635void EmitBindlessImageGatherDref(EmitContext&) {
636 NotImplemented();
637}
638
639void EmitBindlessImageFetch(EmitContext&) {
640 NotImplemented();
641}
642
643void EmitBindlessImageQueryDimensions(EmitContext&) {
644 NotImplemented();
645}
646
647void EmitBindlessImageQueryLod(EmitContext&) {
648 NotImplemented();
649}
650
651void EmitBindlessImageGradient(EmitContext&) {
652 NotImplemented();
653}
654
655void EmitBindlessImageRead(EmitContext&) {
656 NotImplemented();
657}
658
659void EmitBindlessImageWrite(EmitContext&) {
660 NotImplemented();
661}
662
663void EmitBoundImageSampleImplicitLod(EmitContext&) {
664 NotImplemented();
665}
666
667void EmitBoundImageSampleExplicitLod(EmitContext&) {
668 NotImplemented();
669}
670
671void EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
672 NotImplemented();
673}
674
675void EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
676 NotImplemented();
677}
678
679void EmitBoundImageGather(EmitContext&) {
680 NotImplemented();
681}
682
683void EmitBoundImageGatherDref(EmitContext&) {
684 NotImplemented();
685}
686
687void EmitBoundImageFetch(EmitContext&) {
688 NotImplemented();
689}
690
691void EmitBoundImageQueryDimensions(EmitContext&) {
692 NotImplemented();
693}
694
695void EmitBoundImageQueryLod(EmitContext&) {
696 NotImplemented();
697}
698
699void EmitBoundImageGradient(EmitContext&) {
700 NotImplemented();
701}
702
703void EmitBoundImageRead(EmitContext&) {
704 NotImplemented();
705}
706
707void EmitBoundImageWrite(EmitContext&) {
708 NotImplemented();
709}
710
711void EmitBindlessImageAtomicIAdd32(EmitContext&) {
712 NotImplemented();
713}
714
715void EmitBindlessImageAtomicSMin32(EmitContext&) {
716 NotImplemented();
717}
718
719void EmitBindlessImageAtomicUMin32(EmitContext&) {
720 NotImplemented();
721}
722
723void EmitBindlessImageAtomicSMax32(EmitContext&) {
724 NotImplemented();
725}
726
727void EmitBindlessImageAtomicUMax32(EmitContext&) {
728 NotImplemented();
729}
730
731void EmitBindlessImageAtomicInc32(EmitContext&) {
732 NotImplemented();
733}
734
735void EmitBindlessImageAtomicDec32(EmitContext&) {
736 NotImplemented();
737}
738
739void EmitBindlessImageAtomicAnd32(EmitContext&) {
740 NotImplemented();
741}
742
743void EmitBindlessImageAtomicOr32(EmitContext&) {
744 NotImplemented();
745}
746
747void EmitBindlessImageAtomicXor32(EmitContext&) {
748 NotImplemented();
749}
750
751void EmitBindlessImageAtomicExchange32(EmitContext&) {
752 NotImplemented();
753}
754
755void EmitBoundImageAtomicIAdd32(EmitContext&) {
756 NotImplemented();
757}
758
759void EmitBoundImageAtomicSMin32(EmitContext&) {
760 NotImplemented();
761}
762
763void EmitBoundImageAtomicUMin32(EmitContext&) {
764 NotImplemented();
765}
766
767void EmitBoundImageAtomicSMax32(EmitContext&) {
768 NotImplemented();
769}
770
771void EmitBoundImageAtomicUMax32(EmitContext&) {
772 NotImplemented();
773}
774
775void EmitBoundImageAtomicInc32(EmitContext&) {
776 NotImplemented();
777}
778
779void EmitBoundImageAtomicDec32(EmitContext&) {
780 NotImplemented();
781}
782
783void EmitBoundImageAtomicAnd32(EmitContext&) {
784 NotImplemented();
785}
786
787void EmitBoundImageAtomicOr32(EmitContext&) {
788 NotImplemented();
789}
790
791void EmitBoundImageAtomicXor32(EmitContext&) {
792 NotImplemented();
793}
794
795void EmitBoundImageAtomicExchange32(EmitContext&) {
796 NotImplemented();
797}
798
799} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
new file mode 100644
index 000000000..5936d086f
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
@@ -0,0 +1,702 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string_view>
8
9#include "common/common_types.h"
10
11namespace Shader::IR {
12enum class Attribute : u64;
13enum class Patch : u64;
14class Inst;
15class Value;
16} // namespace Shader::IR
17
18namespace Shader::Backend::GLSL {
19class EmitContext;
20
21#define NotImplemented() throw NotImplementedException("GLSL instruction {}", __func__)
22
23// Microinstruction emitters
24void EmitPhi(EmitContext& ctx, IR::Inst& inst);
25void EmitVoid(EmitContext& ctx);
26void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
27void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
28void EmitReference(EmitContext& ctx, const IR::Value& value);
29void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value);
30void EmitJoin(EmitContext& ctx);
31void EmitDemoteToHelperInvocation(EmitContext& ctx);
32void EmitBarrier(EmitContext& ctx);
33void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
34void EmitDeviceMemoryBarrier(EmitContext& ctx);
35void EmitPrologue(EmitContext& ctx);
36void EmitEpilogue(EmitContext& ctx);
37void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream);
38void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);
39void EmitGetRegister(EmitContext& ctx);
40void EmitSetRegister(EmitContext& ctx);
41void EmitGetPred(EmitContext& ctx);
42void EmitSetPred(EmitContext& ctx);
43void EmitSetGotoVariable(EmitContext& ctx);
44void EmitGetGotoVariable(EmitContext& ctx);
45void EmitSetIndirectBranchVariable(EmitContext& ctx);
46void EmitGetIndirectBranchVariable(EmitContext& ctx);
47void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
48 const IR::Value& offset);
49void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
50 const IR::Value& offset);
51void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
52 const IR::Value& offset);
53void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
54 const IR::Value& offset);
55void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
56 const IR::Value& offset);
57void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
58 const IR::Value& offset);
59void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
60 const IR::Value& offset);
61void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
62 std::string_view vertex);
63void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
64 std::string_view vertex);
65void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
66 std::string_view vertex);
67void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value,
68 std::string_view vertex);
69void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch);
70void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value);
71void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value);
72void EmitSetSampleMask(EmitContext& ctx, std::string_view value);
73void EmitSetFragDepth(EmitContext& ctx, std::string_view value);
74void EmitGetZFlag(EmitContext& ctx);
75void EmitGetSFlag(EmitContext& ctx);
76void EmitGetCFlag(EmitContext& ctx);
77void EmitGetOFlag(EmitContext& ctx);
78void EmitSetZFlag(EmitContext& ctx);
79void EmitSetSFlag(EmitContext& ctx);
80void EmitSetCFlag(EmitContext& ctx);
81void EmitSetOFlag(EmitContext& ctx);
82void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst);
83void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst);
84void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
85void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
86void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
87void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
88void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset);
89void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value);
90void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
91void EmitUndefU8(EmitContext& ctx, IR::Inst& inst);
92void EmitUndefU16(EmitContext& ctx, IR::Inst& inst);
93void EmitUndefU32(EmitContext& ctx, IR::Inst& inst);
94void EmitUndefU64(EmitContext& ctx, IR::Inst& inst);
95void EmitLoadGlobalU8(EmitContext& ctx);
96void EmitLoadGlobalS8(EmitContext& ctx);
97void EmitLoadGlobalU16(EmitContext& ctx);
98void EmitLoadGlobalS16(EmitContext& ctx);
99void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address);
100void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address);
101void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address);
102void EmitWriteGlobalU8(EmitContext& ctx);
103void EmitWriteGlobalS8(EmitContext& ctx);
104void EmitWriteGlobalU16(EmitContext& ctx);
105void EmitWriteGlobalS16(EmitContext& ctx);
106void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value);
107void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value);
108void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value);
109void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
110 const IR::Value& offset);
111void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
112 const IR::Value& offset);
113void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
114 const IR::Value& offset);
115void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
116 const IR::Value& offset);
117void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
118 const IR::Value& offset);
119void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
120 const IR::Value& offset);
121void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
122 const IR::Value& offset);
123void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
124 std::string_view value);
125void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
126 std::string_view value);
127void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
128 std::string_view value);
129void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
130 std::string_view value);
131void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
132 std::string_view value);
133void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
134 std::string_view value);
135void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
136 std::string_view value);
137void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
138void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
139void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
140void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
141void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
142void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
143void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
144void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value);
145void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value);
146void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value);
147void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value);
148void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value);
149void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
150 std::string_view e2);
151void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
152 std::string_view e2, std::string_view e3);
153void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
154 std::string_view e2, std::string_view e3, std::string_view e4);
155void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
156 u32 index);
157void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
158 u32 index);
159void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
160 u32 index);
161void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
162 std::string_view object, u32 index);
163void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
164 std::string_view object, u32 index);
165void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
166 std::string_view object, u32 index);
167void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2);
168void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2,
169 std::string_view e3);
170void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2,
171 std::string_view e3, std::string_view e4);
172void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index);
173void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index);
174void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index);
175void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object,
176 u32 index);
177void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object,
178 u32 index);
179void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object,
180 u32 index);
181void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
182 std::string_view e2);
183void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
184 std::string_view e2, std::string_view e3);
185void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
186 std::string_view e2, std::string_view e3, std::string_view e4);
187void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
188 u32 index);
189void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
190 u32 index);
191void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
192 u32 index);
193void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
194 std::string_view object, u32 index);
195void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
196 std::string_view object, u32 index);
197void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
198 std::string_view object, u32 index);
199void EmitCompositeConstructF64x2(EmitContext& ctx);
200void EmitCompositeConstructF64x3(EmitContext& ctx);
201void EmitCompositeConstructF64x4(EmitContext& ctx);
202void EmitCompositeExtractF64x2(EmitContext& ctx);
203void EmitCompositeExtractF64x3(EmitContext& ctx);
204void EmitCompositeExtractF64x4(EmitContext& ctx);
205void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object,
206 u32 index);
207void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object,
208 u32 index);
209void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object,
210 u32 index);
211void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
212 std::string_view true_value, std::string_view false_value);
213void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value,
214 std::string_view false_value);
215void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value,
216 std::string_view false_value);
217void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
218 std::string_view true_value, std::string_view false_value);
219void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
220 std::string_view true_value, std::string_view false_value);
221void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value,
222 std::string_view false_value);
223void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
224 std::string_view true_value, std::string_view false_value);
225void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
226 std::string_view true_value, std::string_view false_value);
227void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst);
228void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
229void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
230void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst);
231void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
232void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
233void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
234void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
235void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
236void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
237void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
238void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
239void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
240void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
241void EmitGetZeroFromOp(EmitContext& ctx);
242void EmitGetSignFromOp(EmitContext& ctx);
243void EmitGetCarryFromOp(EmitContext& ctx);
244void EmitGetOverflowFromOp(EmitContext& ctx);
245void EmitGetSparseFromOp(EmitContext& ctx);
246void EmitGetInBoundsFromOp(EmitContext& ctx);
247void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
248void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
249void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
250void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
251void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
252void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
253void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
254 std::string_view c);
255void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
256 std::string_view c);
257void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
258 std::string_view c);
259void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
260void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
261void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
262void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
263void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
264void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
265void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
266void EmitFPNeg16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
267void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
268void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
269void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value);
270void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value);
271void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value);
272void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value);
273void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
274void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
275void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
276void EmitFPRecipSqrt64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
277void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value);
278void EmitFPSaturate16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
279void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
280void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
281void EmitFPClamp16(EmitContext& ctx, IR::Inst& inst, std::string_view value,
282 std::string_view min_value, std::string_view max_value);
283void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value,
284 std::string_view min_value, std::string_view max_value);
285void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value,
286 std::string_view min_value, std::string_view max_value);
287void EmitFPRoundEven16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
288void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
289void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
290void EmitFPFloor16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
291void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
292void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
293void EmitFPCeil16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
294void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
295void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
296void EmitFPTrunc16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
297void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
298void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
299void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
300void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
301void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
302void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
303void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
304 std::string_view rhs);
305void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
306 std::string_view rhs);
307void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
308void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
309 std::string_view rhs);
310void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
311 std::string_view rhs);
312void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
313void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
314 std::string_view rhs);
315void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
316 std::string_view rhs);
317void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
318void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
319 std::string_view rhs);
320void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
321 std::string_view rhs);
322void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
323void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
324 std::string_view rhs);
325void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
326 std::string_view rhs);
327void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
328void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
329 std::string_view rhs);
330void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
331 std::string_view rhs);
332void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
333void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
334 std::string_view rhs);
335void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
336 std::string_view rhs);
337void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
338void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
339 std::string_view rhs);
340void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
341 std::string_view rhs);
342void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
343void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
344 std::string_view rhs);
345void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
346 std::string_view rhs);
347void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
348void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
349 std::string_view rhs);
350void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
351 std::string_view rhs);
352void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
353void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
354 std::string_view rhs);
355void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
356 std::string_view rhs);
357void EmitFPIsNan16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
358void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
359void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
360void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
361void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
362void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
363void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
364void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
365void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
366void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
367void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
368void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
369 std::string_view shift);
370void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
371 std::string_view shift);
372void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
373 std::string_view shift);
374void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
375 std::string_view shift);
376void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
377 std::string_view shift);
378void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
379 std::string_view shift);
380void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
381void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
382void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
383void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base,
384 std::string_view insert, std::string_view offset, std::string_view count);
385void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
386 std::string_view offset, std::string_view count);
387void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
388 std::string_view offset, std::string_view count);
389void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
390void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
391void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
392void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
393void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
394void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
395void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
396void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
397void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
398void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
399 std::string_view max);
400void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
401 std::string_view max);
402void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
403void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
404void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
405void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
406 std::string_view rhs);
407void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
408 std::string_view rhs);
409void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
410void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
411void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
412void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
413 std::string_view rhs);
414void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
415 std::string_view rhs);
416void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
417 std::string_view value);
418void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
419 std::string_view value);
420void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
421 std::string_view value);
422void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
423 std::string_view value);
424void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
425 std::string_view value);
426void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
427 std::string_view value);
428void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
429 std::string_view value);
430void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
431 std::string_view value);
432void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
433 std::string_view value);
434void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
435 std::string_view value);
436void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
437 std::string_view value);
438void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
439 std::string_view value);
440void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
441 const IR::Value& offset, std::string_view value);
442void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
443 const IR::Value& offset, std::string_view value);
444void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
445 const IR::Value& offset, std::string_view value);
446void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
447 const IR::Value& offset, std::string_view value);
448void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
449 const IR::Value& offset, std::string_view value);
450void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
451 const IR::Value& offset, std::string_view value);
452void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
453 const IR::Value& offset, std::string_view value);
454void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
455 const IR::Value& offset, std::string_view value);
456void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
457 const IR::Value& offset, std::string_view value);
458void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
459 const IR::Value& offset, std::string_view value);
460void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
461 const IR::Value& offset, std::string_view value);
462void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
463 const IR::Value& offset, std::string_view value);
464void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
465 const IR::Value& offset, std::string_view value);
466void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
467 const IR::Value& offset, std::string_view value);
468void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
469 const IR::Value& offset, std::string_view value);
470void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
471 const IR::Value& offset, std::string_view value);
472void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
473 const IR::Value& offset, std::string_view value);
474void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
475 const IR::Value& offset, std::string_view value);
476void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
477 const IR::Value& offset, std::string_view value);
478void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
479 const IR::Value& offset, std::string_view value);
480void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
481 const IR::Value& offset, std::string_view value);
482void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
483 const IR::Value& offset, std::string_view value);
484void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
485 const IR::Value& offset, std::string_view value);
486void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
487 const IR::Value& offset, std::string_view value);
488void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
489 const IR::Value& offset, std::string_view value);
490void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
491 const IR::Value& offset, std::string_view value);
492void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
493 const IR::Value& offset, std::string_view value);
494void EmitGlobalAtomicIAdd32(EmitContext& ctx);
495void EmitGlobalAtomicSMin32(EmitContext& ctx);
496void EmitGlobalAtomicUMin32(EmitContext& ctx);
497void EmitGlobalAtomicSMax32(EmitContext& ctx);
498void EmitGlobalAtomicUMax32(EmitContext& ctx);
499void EmitGlobalAtomicInc32(EmitContext& ctx);
500void EmitGlobalAtomicDec32(EmitContext& ctx);
501void EmitGlobalAtomicAnd32(EmitContext& ctx);
502void EmitGlobalAtomicOr32(EmitContext& ctx);
503void EmitGlobalAtomicXor32(EmitContext& ctx);
504void EmitGlobalAtomicExchange32(EmitContext& ctx);
505void EmitGlobalAtomicIAdd64(EmitContext& ctx);
506void EmitGlobalAtomicSMin64(EmitContext& ctx);
507void EmitGlobalAtomicUMin64(EmitContext& ctx);
508void EmitGlobalAtomicSMax64(EmitContext& ctx);
509void EmitGlobalAtomicUMax64(EmitContext& ctx);
510void EmitGlobalAtomicInc64(EmitContext& ctx);
511void EmitGlobalAtomicDec64(EmitContext& ctx);
512void EmitGlobalAtomicAnd64(EmitContext& ctx);
513void EmitGlobalAtomicOr64(EmitContext& ctx);
514void EmitGlobalAtomicXor64(EmitContext& ctx);
515void EmitGlobalAtomicExchange64(EmitContext& ctx);
516void EmitGlobalAtomicAddF32(EmitContext& ctx);
517void EmitGlobalAtomicAddF16x2(EmitContext& ctx);
518void EmitGlobalAtomicAddF32x2(EmitContext& ctx);
519void EmitGlobalAtomicMinF16x2(EmitContext& ctx);
520void EmitGlobalAtomicMinF32x2(EmitContext& ctx);
521void EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
522void EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
523void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
524void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
525void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
526void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value);
527void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
528void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
529void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
530void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
531void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
532void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
533void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
534void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
535void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
536void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
537void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
538void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
539void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
540void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
541void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
542void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
543void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
544void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
545void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
546void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
547void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
548void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
549void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
550void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
551void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
552void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
553void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
554void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
555void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
556void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
557void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
558void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
559void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
560void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
561void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
562void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
563void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
564void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
565void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
566void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
567void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
568void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
569void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
570void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
571void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
572void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
573void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
574void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
575void EmitBindlessImageSampleImplicitLod(EmitContext&);
576void EmitBindlessImageSampleExplicitLod(EmitContext&);
577void EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
578void EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
579void EmitBindlessImageGather(EmitContext&);
580void EmitBindlessImageGatherDref(EmitContext&);
581void EmitBindlessImageFetch(EmitContext&);
582void EmitBindlessImageQueryDimensions(EmitContext&);
583void EmitBindlessImageQueryLod(EmitContext&);
584void EmitBindlessImageGradient(EmitContext&);
585void EmitBindlessImageRead(EmitContext&);
586void EmitBindlessImageWrite(EmitContext&);
587void EmitBoundImageSampleImplicitLod(EmitContext&);
588void EmitBoundImageSampleExplicitLod(EmitContext&);
589void EmitBoundImageSampleDrefImplicitLod(EmitContext&);
590void EmitBoundImageSampleDrefExplicitLod(EmitContext&);
591void EmitBoundImageGather(EmitContext&);
592void EmitBoundImageGatherDref(EmitContext&);
593void EmitBoundImageFetch(EmitContext&);
594void EmitBoundImageQueryDimensions(EmitContext&);
595void EmitBoundImageQueryLod(EmitContext&);
596void EmitBoundImageGradient(EmitContext&);
597void EmitBoundImageRead(EmitContext&);
598void EmitBoundImageWrite(EmitContext&);
599void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
600 std::string_view coords, std::string_view bias_lc,
601 const IR::Value& offset);
602void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
603 std::string_view coords, std::string_view lod_lc,
604 const IR::Value& offset);
605void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
606 std::string_view coords, std::string_view dref,
607 std::string_view bias_lc, const IR::Value& offset);
608void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
609 std::string_view coords, std::string_view dref,
610 std::string_view lod_lc, const IR::Value& offset);
611void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
612 std::string_view coords, const IR::Value& offset, const IR::Value& offset2);
613void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
614 std::string_view coords, const IR::Value& offset, const IR::Value& offset2,
615 std::string_view dref);
616void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
617 std::string_view coords, std::string_view offset, std::string_view lod,
618 std::string_view ms);
619void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
620 std::string_view lod);
621void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
622 std::string_view coords);
623void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
624 std::string_view coords, const IR::Value& derivatives,
625 const IR::Value& offset, const IR::Value& lod_clamp);
626void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
627 std::string_view coords);
628void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
629 std::string_view coords, std::string_view color);
630void EmitBindlessImageAtomicIAdd32(EmitContext&);
631void EmitBindlessImageAtomicSMin32(EmitContext&);
632void EmitBindlessImageAtomicUMin32(EmitContext&);
633void EmitBindlessImageAtomicSMax32(EmitContext&);
634void EmitBindlessImageAtomicUMax32(EmitContext&);
635void EmitBindlessImageAtomicInc32(EmitContext&);
636void EmitBindlessImageAtomicDec32(EmitContext&);
637void EmitBindlessImageAtomicAnd32(EmitContext&);
638void EmitBindlessImageAtomicOr32(EmitContext&);
639void EmitBindlessImageAtomicXor32(EmitContext&);
640void EmitBindlessImageAtomicExchange32(EmitContext&);
641void EmitBoundImageAtomicIAdd32(EmitContext&);
642void EmitBoundImageAtomicSMin32(EmitContext&);
643void EmitBoundImageAtomicUMin32(EmitContext&);
644void EmitBoundImageAtomicSMax32(EmitContext&);
645void EmitBoundImageAtomicUMax32(EmitContext&);
646void EmitBoundImageAtomicInc32(EmitContext&);
647void EmitBoundImageAtomicDec32(EmitContext&);
648void EmitBoundImageAtomicAnd32(EmitContext&);
649void EmitBoundImageAtomicOr32(EmitContext&);
650void EmitBoundImageAtomicXor32(EmitContext&);
651void EmitBoundImageAtomicExchange32(EmitContext&);
652void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
653 std::string_view coords, std::string_view value);
654void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
655 std::string_view coords, std::string_view value);
656void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
657 std::string_view coords, std::string_view value);
658void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
659 std::string_view coords, std::string_view value);
660void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
661 std::string_view coords, std::string_view value);
662void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
663 std::string_view coords, std::string_view value);
664void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
665 std::string_view coords, std::string_view value);
666void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
667 std::string_view coords, std::string_view value);
668void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
669 std::string_view coords, std::string_view value);
670void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
671 std::string_view coords, std::string_view value);
672void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
673 std::string_view coords, std::string_view value);
674void EmitLaneId(EmitContext& ctx, IR::Inst& inst);
675void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
676void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
677void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
678void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
679void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst);
680void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst);
681void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst);
682void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst);
683void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst);
684void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
685 std::string_view index, std::string_view clamp,
686 std::string_view segmentation_mask);
687void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
688 std::string_view clamp, std::string_view segmentation_mask);
689void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
690 std::string_view index, std::string_view clamp,
691 std::string_view segmentation_mask);
692void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
693 std::string_view index, std::string_view clamp,
694 std::string_view segmentation_mask);
695void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b,
696 std::string_view swizzle);
697void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
698void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
699void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
700void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
701
702} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
new file mode 100644
index 000000000..38419f88f
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
@@ -0,0 +1,253 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::Backend::GLSL {
12namespace {
13void SetZeroFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) {
14 IR::Inst* const zero{inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)};
15 if (!zero) {
16 return;
17 }
18 ctx.AddU1("{}={}==0;", *zero, result);
19 zero->Invalidate();
20}
21
22void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) {
23 IR::Inst* const sign{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)};
24 if (!sign) {
25 return;
26 }
27 ctx.AddU1("{}=int({})<0;", *sign, result);
28 sign->Invalidate();
29}
30
31void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
32 char lop) {
33 const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
34 ctx.Add("{}={}{}{};", result, a, lop, b);
35 SetZeroFlag(ctx, inst, result);
36 SetSignFlag(ctx, inst, result);
37}
38} // Anonymous namespace
39
40void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
41 // Compute the overflow CC first as it requires the original operand values,
42 // which may be overwritten by the result of the addition
43 if (IR::Inst * overflow{inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) {
44 // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c
45 constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())};
46 const auto sub_a{fmt::format("{}u-{}", s32_max, a)};
47 const auto positive_result{fmt::format("int({})>int({})", b, sub_a)};
48 const auto negative_result{fmt::format("int({})<int({})", b, sub_a)};
49 ctx.AddU1("{}=int({})>=0?{}:{};", *overflow, a, positive_result, negative_result);
50 overflow->Invalidate();
51 }
52 const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
53 if (IR::Inst* const carry{inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) {
54 ctx.uses_cc_carry = true;
55 ctx.Add("{}=uaddCarry({},{},carry);", result, a, b);
56 ctx.AddU1("{}=carry!=0;", *carry);
57 carry->Invalidate();
58 } else {
59 ctx.Add("{}={}+{};", result, a, b);
60 }
61 SetZeroFlag(ctx, inst, result);
62 SetSignFlag(ctx, inst, result);
63}
64
65void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
66 ctx.AddU64("{}={}+{};", inst, a, b);
67}
68
69void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
70 ctx.AddU32("{}={}-{};", inst, a, b);
71}
72
73void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
74 ctx.AddU64("{}={}-{};", inst, a, b);
75}
76
77void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
78 ctx.AddU32("{}=uint({}*{});", inst, a, b);
79}
80
81void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
82 ctx.AddU32("{}=uint(-({}));", inst, value);
83}
84
85void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
86 ctx.AddU64("{}=-({});", inst, value);
87}
88
89void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
90 ctx.AddU32("{}=abs(int({}));", inst, value);
91}
92
93void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
94 std::string_view shift) {
95 ctx.AddU32("{}={}<<{};", inst, base, shift);
96}
97
98void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
99 std::string_view shift) {
100 ctx.AddU64("{}={}<<{};", inst, base, shift);
101}
102
103void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
104 std::string_view shift) {
105 ctx.AddU32("{}={}>>{};", inst, base, shift);
106}
107
108void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
109 std::string_view shift) {
110 ctx.AddU64("{}={}>>{};", inst, base, shift);
111}
112
113void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
114 std::string_view shift) {
115 ctx.AddU32("{}=int({})>>{};", inst, base, shift);
116}
117
118void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
119 std::string_view shift) {
120 ctx.AddU64("{}=int64_t({})>>{};", inst, base, shift);
121}
122
123void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
124 BitwiseLogicalOp(ctx, inst, a, b, '&');
125}
126
127void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
128 BitwiseLogicalOp(ctx, inst, a, b, '|');
129}
130
131void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
132 BitwiseLogicalOp(ctx, inst, a, b, '^');
133}
134
135void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base,
136 std::string_view insert, std::string_view offset, std::string_view count) {
137 ctx.AddU32("{}=bitfieldInsert({},{},int({}),int({}));", inst, base, insert, offset, count);
138}
139
140void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
141 std::string_view offset, std::string_view count) {
142 const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
143 ctx.Add("{}=uint(bitfieldExtract(int({}),int({}),int({})));", result, base, offset, count);
144 SetZeroFlag(ctx, inst, result);
145 SetSignFlag(ctx, inst, result);
146}
147
148void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
149 std::string_view offset, std::string_view count) {
150 const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
151 ctx.Add("{}=uint(bitfieldExtract(uint({}),int({}),int({})));", result, base, offset, count);
152 SetZeroFlag(ctx, inst, result);
153 SetSignFlag(ctx, inst, result);
154}
155
156void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
157 ctx.AddU32("{}=bitfieldReverse({});", inst, value);
158}
159
160void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
161 ctx.AddU32("{}=bitCount({});", inst, value);
162}
163
164void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
165 ctx.AddU32("{}=~{};", inst, value);
166}
167
168void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
169 ctx.AddU32("{}=findMSB(int({}));", inst, value);
170}
171
172void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
173 ctx.AddU32("{}=findMSB(uint({}));", inst, value);
174}
175
176void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
177 ctx.AddU32("{}=min(int({}),int({}));", inst, a, b);
178}
179
180void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
181 ctx.AddU32("{}=min(uint({}),uint({}));", inst, a, b);
182}
183
184void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
185 ctx.AddU32("{}=max(int({}),int({}));", inst, a, b);
186}
187
188void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
189 ctx.AddU32("{}=max(uint({}),uint({}));", inst, a, b);
190}
191
192void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
193 std::string_view max) {
194 const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
195 ctx.Add("{}=clamp(int({}),int({}),int({}));", result, value, min, max);
196 SetZeroFlag(ctx, inst, result);
197 SetSignFlag(ctx, inst, result);
198}
199
200void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
201 std::string_view max) {
202 const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
203 ctx.Add("{}=clamp(uint({}),uint({}),uint({}));", result, value, min, max);
204 SetZeroFlag(ctx, inst, result);
205 SetSignFlag(ctx, inst, result);
206}
207
208void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
209 ctx.AddU1("{}=int({})<int({});", inst, lhs, rhs);
210}
211
212void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
213 ctx.AddU1("{}=uint({})<uint({});", inst, lhs, rhs);
214}
215
216void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
217 ctx.AddU1("{}={}=={};", inst, lhs, rhs);
218}
219
220void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
221 std::string_view rhs) {
222 ctx.AddU1("{}=int({})<=int({});", inst, lhs, rhs);
223}
224
225void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
226 std::string_view rhs) {
227 ctx.AddU1("{}=uint({})<=uint({});", inst, lhs, rhs);
228}
229
230void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
231 std::string_view rhs) {
232 ctx.AddU1("{}=int({})>int({});", inst, lhs, rhs);
233}
234
235void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
236 std::string_view rhs) {
237 ctx.AddU1("{}=uint({})>uint({});", inst, lhs, rhs);
238}
239
240void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
241 ctx.AddU1("{}={}!={};", inst, lhs, rhs);
242}
243
244void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
245 std::string_view rhs) {
246 ctx.AddU1("{}=int({})>=int({});", inst, lhs, rhs);
247}
248
249void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
250 std::string_view rhs) {
251 ctx.AddU1("{}=uint({})>=uint({});", inst, lhs, rhs);
252}
253} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp
new file mode 100644
index 000000000..338ff4bd6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::Backend::GLSL {
12
13void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
14 ctx.AddU1("{}={}||{};", inst, a, b);
15}
16
17void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
18 ctx.AddU1("{}={}&&{};", inst, a, b);
19}
20
21void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
22 ctx.AddU1("{}={}^^{};", inst, a, b);
23}
24
25void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
26 ctx.AddU1("{}=!{};", inst, value);
27}
28} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp
new file mode 100644
index 000000000..e3957491f
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp
@@ -0,0 +1,202 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10#include "shader_recompiler/profile.h"
11
12namespace Shader::Backend::GLSL {
13namespace {
14constexpr char cas_loop[]{"for(;;){{uint old_value={};uint "
15 "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));"
16 "if(cas_result==old_value){{break;}}}}"};
17
18void SsboWriteCas(EmitContext& ctx, const IR::Value& binding, std::string_view offset_var,
19 std::string_view value, std::string_view bit_offset, u32 num_bits) {
20 const auto ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), offset_var)};
21 ctx.Add(cas_loop, ssbo, ssbo, ssbo, value, bit_offset, num_bits);
22}
23} // Anonymous namespace
24
25void EmitLoadGlobalU8(EmitContext&) {
26 NotImplemented();
27}
28
29void EmitLoadGlobalS8(EmitContext&) {
30 NotImplemented();
31}
32
33void EmitLoadGlobalU16(EmitContext&) {
34 NotImplemented();
35}
36
37void EmitLoadGlobalS16(EmitContext&) {
38 NotImplemented();
39}
40
41void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
42 if (ctx.profile.support_int64) {
43 return ctx.AddU32("{}=LoadGlobal32({});", inst, address);
44 }
45 LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
46 ctx.AddU32("{}=0u;", inst);
47}
48
49void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
50 if (ctx.profile.support_int64) {
51 return ctx.AddU32x2("{}=LoadGlobal64({});", inst, address);
52 }
53 LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
54 ctx.AddU32x2("{}=uvec2(0);", inst);
55}
56
57void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
58 if (ctx.profile.support_int64) {
59 return ctx.AddU32x4("{}=LoadGlobal128({});", inst, address);
60 }
61 LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
62 ctx.AddU32x4("{}=uvec4(0);", inst);
63}
64
65void EmitWriteGlobalU8(EmitContext&) {
66 NotImplemented();
67}
68
69void EmitWriteGlobalS8(EmitContext&) {
70 NotImplemented();
71}
72
73void EmitWriteGlobalU16(EmitContext&) {
74 NotImplemented();
75}
76
77void EmitWriteGlobalS16(EmitContext&) {
78 NotImplemented();
79}
80
81void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) {
82 if (ctx.profile.support_int64) {
83 return ctx.Add("WriteGlobal32({},{});", address, value);
84 }
85 LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
86}
87
88void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) {
89 if (ctx.profile.support_int64) {
90 return ctx.Add("WriteGlobal64({},{});", address, value);
91 }
92 LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
93}
94
95void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) {
96 if (ctx.profile.support_int64) {
97 return ctx.Add("WriteGlobal128({},{});", address, value);
98 }
99 LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
100}
101
102void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
103 const IR::Value& offset) {
104 const auto offset_var{ctx.var_alloc.Consume(offset)};
105 ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int({}%4)*8,8);", inst, ctx.stage_name,
106 binding.U32(), offset_var, offset_var);
107}
108
109void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
110 const IR::Value& offset) {
111 const auto offset_var{ctx.var_alloc.Consume(offset)};
112 ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int({}%4)*8,8);", inst, ctx.stage_name,
113 binding.U32(), offset_var, offset_var);
114}
115
116void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
117 const IR::Value& offset) {
118 const auto offset_var{ctx.var_alloc.Consume(offset)};
119 ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int(({}>>1)%2)*16,16);", inst, ctx.stage_name,
120 binding.U32(), offset_var, offset_var);
121}
122
123void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
124 const IR::Value& offset) {
125 const auto offset_var{ctx.var_alloc.Consume(offset)};
126 ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int(({}>>1)%2)*16,16);", inst,
127 ctx.stage_name, binding.U32(), offset_var, offset_var);
128}
129
130void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
131 const IR::Value& offset) {
132 const auto offset_var{ctx.var_alloc.Consume(offset)};
133 ctx.AddU32("{}={}_ssbo{}[{}>>2];", inst, ctx.stage_name, binding.U32(), offset_var);
134}
135
136void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
137 const IR::Value& offset) {
138 const auto offset_var{ctx.var_alloc.Consume(offset)};
139 ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name,
140 binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var);
141}
142
143void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
144 const IR::Value& offset) {
145 const auto offset_var{ctx.var_alloc.Consume(offset)};
146 ctx.AddU32x4("{}=uvec4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}"
147 "+12)>>2]);",
148 inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(),
149 offset_var, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name,
150 binding.U32(), offset_var);
151}
152
153void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
154 std::string_view value) {
155 const auto offset_var{ctx.var_alloc.Consume(offset)};
156 const auto bit_offset{fmt::format("int({}%4)*8", offset_var)};
157 SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8);
158}
159
160void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
161 std::string_view value) {
162 const auto offset_var{ctx.var_alloc.Consume(offset)};
163 const auto bit_offset{fmt::format("int({}%4)*8", offset_var)};
164 SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8);
165}
166
167void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
168 std::string_view value) {
169 const auto offset_var{ctx.var_alloc.Consume(offset)};
170 const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)};
171 SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16);
172}
173
174void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
175 std::string_view value) {
176 const auto offset_var{ctx.var_alloc.Consume(offset)};
177 const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)};
178 SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16);
179}
180
181void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
182 std::string_view value) {
183 const auto offset_var{ctx.var_alloc.Consume(offset)};
184 ctx.Add("{}_ssbo{}[{}>>2]={};", ctx.stage_name, binding.U32(), offset_var, value);
185}
186
187void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
188 std::string_view value) {
189 const auto offset_var{ctx.var_alloc.Consume(offset)};
190 ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value);
191 ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value);
192}
193
194void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
195 std::string_view value) {
196 const auto offset_var{ctx.var_alloc.Consume(offset)};
197 ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value);
198 ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value);
199 ctx.Add("{}_ssbo{}[({}+8)>>2]={}.z;", ctx.stage_name, binding.U32(), offset_var, value);
200 ctx.Add("{}_ssbo{}[({}+12)>>2]={}.w;", ctx.stage_name, binding.U32(), offset_var, value);
201}
202} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp
new file mode 100644
index 000000000..f420fe388
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp
@@ -0,0 +1,105 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10
11#ifdef _MSC_VER
12#pragma warning(disable : 4100)
13#endif
14
15namespace Shader::Backend::GLSL {
16
17void EmitGetRegister(EmitContext& ctx) {
18 NotImplemented();
19}
20
21void EmitSetRegister(EmitContext& ctx) {
22 NotImplemented();
23}
24
25void EmitGetPred(EmitContext& ctx) {
26 NotImplemented();
27}
28
29void EmitSetPred(EmitContext& ctx) {
30 NotImplemented();
31}
32
33void EmitSetGotoVariable(EmitContext& ctx) {
34 NotImplemented();
35}
36
37void EmitGetGotoVariable(EmitContext& ctx) {
38 NotImplemented();
39}
40
41void EmitSetIndirectBranchVariable(EmitContext& ctx) {
42 NotImplemented();
43}
44
45void EmitGetIndirectBranchVariable(EmitContext& ctx) {
46 NotImplemented();
47}
48
49void EmitGetZFlag(EmitContext& ctx) {
50 NotImplemented();
51}
52
53void EmitGetSFlag(EmitContext& ctx) {
54 NotImplemented();
55}
56
57void EmitGetCFlag(EmitContext& ctx) {
58 NotImplemented();
59}
60
61void EmitGetOFlag(EmitContext& ctx) {
62 NotImplemented();
63}
64
65void EmitSetZFlag(EmitContext& ctx) {
66 NotImplemented();
67}
68
69void EmitSetSFlag(EmitContext& ctx) {
70 NotImplemented();
71}
72
73void EmitSetCFlag(EmitContext& ctx) {
74 NotImplemented();
75}
76
77void EmitSetOFlag(EmitContext& ctx) {
78 NotImplemented();
79}
80
81void EmitGetZeroFromOp(EmitContext& ctx) {
82 NotImplemented();
83}
84
85void EmitGetSignFromOp(EmitContext& ctx) {
86 NotImplemented();
87}
88
89void EmitGetCarryFromOp(EmitContext& ctx) {
90 NotImplemented();
91}
92
93void EmitGetOverflowFromOp(EmitContext& ctx) {
94 NotImplemented();
95}
96
97void EmitGetSparseFromOp(EmitContext& ctx) {
98 NotImplemented();
99}
100
101void EmitGetInBoundsFromOp(EmitContext& ctx) {
102 NotImplemented();
103}
104
105} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp
new file mode 100644
index 000000000..49fba9073
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::Backend::GLSL {
12void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
13 std::string_view true_value, std::string_view false_value) {
14 ctx.AddU1("{}={}?{}:{};", inst, cond, true_value, false_value);
15}
16
17void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond,
18 [[maybe_unused]] std::string_view true_value,
19 [[maybe_unused]] std::string_view false_value) {
20 NotImplemented();
21}
22
23void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond,
24 [[maybe_unused]] std::string_view true_value,
25 [[maybe_unused]] std::string_view false_value) {
26 NotImplemented();
27}
28
29void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
30 std::string_view true_value, std::string_view false_value) {
31 ctx.AddU32("{}={}?{}:{};", inst, cond, true_value, false_value);
32}
33
34void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
35 std::string_view true_value, std::string_view false_value) {
36 ctx.AddU64("{}={}?{}:{};", inst, cond, true_value, false_value);
37}
38
39void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond,
40 [[maybe_unused]] std::string_view true_value,
41 [[maybe_unused]] std::string_view false_value) {
42 NotImplemented();
43}
44
45void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
46 std::string_view true_value, std::string_view false_value) {
47 ctx.AddF32("{}={}?{}:{};", inst, cond, true_value, false_value);
48}
49
50void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
51 std::string_view true_value, std::string_view false_value) {
52 ctx.AddF64("{}={}?{}:{};", inst, cond, true_value, false_value);
53}
54
55} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp
new file mode 100644
index 000000000..518b78f06
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp
@@ -0,0 +1,79 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::Backend::GLSL {
12namespace {
13constexpr char cas_loop[]{"for(;;){{uint old_value={};uint "
14 "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));"
15 "if(cas_result==old_value){{break;}}}}"};
16
17void SharedWriteCas(EmitContext& ctx, std::string_view offset, std::string_view value,
18 std::string_view bit_offset, u32 num_bits) {
19 const auto smem{fmt::format("smem[{}>>2]", offset)};
20 ctx.Add(cas_loop, smem, smem, smem, value, bit_offset, num_bits);
21}
22} // Anonymous namespace
23
24void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
25 ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset);
26}
27
28void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
29 ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int({}%4)*8,8);", inst, offset, offset);
30}
31
32void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
33 ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int(({}>>1)%2)*16,16);", inst, offset, offset);
34}
35
36void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
37 ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int(({}>>1)%2)*16,16);", inst, offset, offset);
38}
39
40void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
41 ctx.AddU32("{}=smem[{}>>2];", inst, offset);
42}
43
44void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
45 ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset);
46}
47
48void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
49 ctx.AddU32x4("{}=uvec4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst,
50 offset, offset, offset, offset);
51}
52
53void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) {
54 const auto bit_offset{fmt::format("int({}%4)*8", offset)};
55 SharedWriteCas(ctx, offset, value, bit_offset, 8);
56}
57
58void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) {
59 const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset)};
60 SharedWriteCas(ctx, offset, value, bit_offset, 16);
61}
62
63void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) {
64 ctx.Add("smem[{}>>2]={};", offset, value);
65}
66
67void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) {
68 ctx.Add("smem[{}>>2]={}.x;", offset, value);
69 ctx.Add("smem[({}+4)>>2]={}.y;", offset, value);
70}
71
72void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) {
73 ctx.Add("smem[{}>>2]={}.x;", offset, value);
74 ctx.Add("smem[({}+4)>>2]={}.y;", offset, value);
75 ctx.Add("smem[({}+8)>>2]={}.z;", offset, value);
76 ctx.Add("smem[({}+12)>>2]={}.w;", offset, value);
77}
78
79} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
new file mode 100644
index 000000000..9b866f889
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
@@ -0,0 +1,111 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/program.h"
10#include "shader_recompiler/frontend/ir/value.h"
11#include "shader_recompiler/profile.h"
12
13namespace Shader::Backend::GLSL {
14namespace {
15std::string_view OutputVertexIndex(EmitContext& ctx) {
16 return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
17}
18
19void InitializeOutputVaryings(EmitContext& ctx) {
20 if (ctx.uses_geometry_passthrough) {
21 return;
22 }
23 if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) {
24 ctx.Add("gl_Position=vec4(0,0,0,1);");
25 }
26 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
27 if (!ctx.info.stores.Generic(index)) {
28 continue;
29 }
30 const auto& info_array{ctx.output_generics.at(index)};
31 const auto output_decorator{OutputVertexIndex(ctx)};
32 size_t element{};
33 while (element < info_array.size()) {
34 const auto& info{info_array.at(element)};
35 const auto varying_name{fmt::format("{}{}", info.name, output_decorator)};
36 switch (info.num_components) {
37 case 1: {
38 const char value{element == 3 ? '1' : '0'};
39 ctx.Add("{}={}.f;", varying_name, value);
40 break;
41 }
42 case 2:
43 case 3:
44 if (element + info.num_components < 4) {
45 ctx.Add("{}=vec{}(0);", varying_name, info.num_components);
46 } else {
47 // last element is the w component, must be initialized to 1
48 const auto zeros{info.num_components == 3 ? "0,0," : "0,"};
49 ctx.Add("{}=vec{}({}1);", varying_name, info.num_components, zeros);
50 }
51 break;
52 case 4:
53 ctx.Add("{}=vec4(0,0,0,1);", varying_name);
54 break;
55 default:
56 break;
57 }
58 element += info.num_components;
59 }
60 }
61}
62} // Anonymous namespace
63
64void EmitPhi(EmitContext& ctx, IR::Inst& phi) {
65 const size_t num_args{phi.NumArgs()};
66 for (size_t i = 0; i < num_args; ++i) {
67 ctx.var_alloc.Consume(phi.Arg(i));
68 }
69 if (!phi.Definition<Id>().is_valid) {
70 // The phi node wasn't forward defined
71 ctx.var_alloc.PhiDefine(phi, phi.Arg(0).Type());
72 }
73}
74
75void EmitVoid(EmitContext&) {}
76
77void EmitReference(EmitContext& ctx, const IR::Value& value) {
78 ctx.var_alloc.Consume(value);
79}
80
81void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) {
82 IR::Inst& phi{*phi_value.InstRecursive()};
83 const auto phi_type{phi.Arg(0).Type()};
84 if (!phi.Definition<Id>().is_valid) {
85 // The phi node wasn't forward defined
86 ctx.var_alloc.PhiDefine(phi, phi_type);
87 }
88 const auto phi_reg{ctx.var_alloc.Consume(IR::Value{&phi})};
89 const auto val_reg{ctx.var_alloc.Consume(value)};
90 if (phi_reg == val_reg) {
91 return;
92 }
93 ctx.Add("{}={};", phi_reg, val_reg);
94}
95
96void EmitPrologue(EmitContext& ctx) {
97 InitializeOutputVaryings(ctx);
98}
99
100void EmitEpilogue(EmitContext&) {}
101
102void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
103 ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream));
104 InitializeOutputVaryings(ctx);
105}
106
107void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
108 ctx.Add("EndStreamPrimitive(int({}));", ctx.var_alloc.Consume(stream));
109}
110
111} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp
new file mode 100644
index 000000000..15bf02dd6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp
@@ -0,0 +1,32 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9
10namespace Shader::Backend::GLSL {
11
12void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) {
13 ctx.AddU1("{}=false;", inst);
14}
15
16void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) {
17 ctx.AddU32("{}=0u;", inst);
18}
19
20void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) {
21 ctx.AddU32("{}=0u;", inst);
22}
23
24void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) {
25 ctx.AddU32("{}=0u;", inst);
26}
27
28void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) {
29 ctx.AddU64("{}=0u;", inst);
30}
31
32} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
new file mode 100644
index 000000000..a982dd8a2
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
@@ -0,0 +1,217 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10#include "shader_recompiler/profile.h"
11
12namespace Shader::Backend::GLSL {
13namespace {
14void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) {
15 IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
16 if (!in_bounds) {
17 return;
18 }
19 ctx.AddU1("{}=shfl_in_bounds;", *in_bounds);
20 in_bounds->Invalidate();
21}
22
23std::string ComputeMinThreadId(std::string_view thread_id, std::string_view segmentation_mask) {
24 return fmt::format("({}&{})", thread_id, segmentation_mask);
25}
26
27std::string ComputeMaxThreadId(std::string_view min_thread_id, std::string_view clamp,
28 std::string_view not_seg_mask) {
29 return fmt::format("({})|({}&{})", min_thread_id, clamp, not_seg_mask);
30}
31
32std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp,
33 std::string_view segmentation_mask) {
34 const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)};
35 const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)};
36 return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask);
37}
38
39void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op,
40 std::string_view value, std::string_view index,
41 [[maybe_unused]] std::string_view clamp, std::string_view segmentation_mask) {
42 const auto width{fmt::format("32u>>(bitCount({}&31u))", segmentation_mask)};
43 ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width);
44 SetInBoundsFlag(ctx, inst);
45}
46} // Anonymous namespace
47
48void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
49 ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst);
50}
51
52void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
53 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
54 ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
55 } else {
56 const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
57 const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
58 ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
59 }
60}
61
62void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
63 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
64 ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
65 } else {
66 const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
67 const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
68 ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
69 }
70}
71
72void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
73 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
74 ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
75 } else {
76 const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
77 const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
78 const auto value{fmt::format("({}^{})", ballot, active_mask)};
79 ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
80 }
81}
82
83void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
84 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
85 ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred);
86 } else {
87 ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred);
88 }
89}
90
91void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
92 ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst);
93}
94
95void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
96 ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst);
97}
98
99void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
100 ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst);
101}
102
103void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
104 ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst);
105}
106
107void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
108 ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst);
109}
110
111void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
112 std::string_view index, std::string_view clamp,
113 std::string_view segmentation_mask) {
114 if (ctx.profile.support_gl_warp_intrinsics) {
115 UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask);
116 return;
117 }
118 const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)};
119 const auto thread_id{"gl_SubGroupInvocationARB"};
120 const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)};
121 const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)};
122
123 const auto lhs{fmt::format("({}&{})", index, not_seg_mask)};
124 const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)};
125 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
126 SetInBoundsFlag(ctx, inst);
127 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
128}
129
130void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
131 std::string_view clamp, std::string_view segmentation_mask) {
132 if (ctx.profile.support_gl_warp_intrinsics) {
133 UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask);
134 return;
135 }
136 const auto thread_id{"gl_SubGroupInvocationARB"};
137 const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
138 const auto src_thread_id{fmt::format("({}-{})", thread_id, index)};
139 ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id);
140 SetInBoundsFlag(ctx, inst);
141 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
142}
143
144void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
145 std::string_view index, std::string_view clamp,
146 std::string_view segmentation_mask) {
147 if (ctx.profile.support_gl_warp_intrinsics) {
148 UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask);
149 return;
150 }
151 const auto thread_id{"gl_SubGroupInvocationARB"};
152 const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
153 const auto src_thread_id{fmt::format("({}+{})", thread_id, index)};
154 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
155 SetInBoundsFlag(ctx, inst);
156 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
157}
158
159void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
160 std::string_view index, std::string_view clamp,
161 std::string_view segmentation_mask) {
162 if (ctx.profile.support_gl_warp_intrinsics) {
163 UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask);
164 return;
165 }
166 const auto thread_id{"gl_SubGroupInvocationARB"};
167 const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
168 const auto src_thread_id{fmt::format("({}^{})", thread_id, index)};
169 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
170 SetInBoundsFlag(ctx, inst);
171 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
172}
173
174void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b,
175 std::string_view swizzle) {
176 const auto mask{fmt::format("({}>>((gl_SubGroupInvocationARB&3)<<1))&3", swizzle)};
177 const std::string modifier_a = fmt::format("FSWZ_A[{}]", mask);
178 const std::string modifier_b = fmt::format("FSWZ_B[{}]", mask);
179 ctx.AddF32("{}=({}*{})+({}*{});", inst, op_a, modifier_a, op_b, modifier_b);
180}
181
182void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
183 if (ctx.profile.support_gl_derivative_control) {
184 ctx.AddF32("{}=dFdxFine({});", inst, op_a);
185 } else {
186 LOG_WARNING(Shader_GLSL, "Device does not support dFdxFine, fallback to dFdx");
187 ctx.AddF32("{}=dFdx({});", inst, op_a);
188 }
189}
190
191void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
192 if (ctx.profile.support_gl_derivative_control) {
193 ctx.AddF32("{}=dFdyFine({});", inst, op_a);
194 } else {
195 LOG_WARNING(Shader_GLSL, "Device does not support dFdyFine, fallback to dFdy");
196 ctx.AddF32("{}=dFdy({});", inst, op_a);
197 }
198}
199
200void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
201 if (ctx.profile.support_gl_derivative_control) {
202 ctx.AddF32("{}=dFdxCoarse({});", inst, op_a);
203 } else {
204 LOG_WARNING(Shader_GLSL, "Device does not support dFdxCoarse, fallback to dFdx");
205 ctx.AddF32("{}=dFdx({});", inst, op_a);
206 }
207}
208
209void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
210 if (ctx.profile.support_gl_derivative_control) {
211 ctx.AddF32("{}=dFdyCoarse({});", inst, op_a);
212 } else {
213 LOG_WARNING(Shader_GLSL, "Device does not support dFdyCoarse, fallback to dFdy");
214 ctx.AddF32("{}=dFdy({});", inst, op_a);
215 }
216}
217} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp
new file mode 100644
index 000000000..194f926ca
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp
@@ -0,0 +1,308 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6#include <string_view>
7
8#include <fmt/format.h>
9
10#include "shader_recompiler/backend/glsl/var_alloc.h"
11#include "shader_recompiler/exception.h"
12#include "shader_recompiler/frontend/ir/value.h"
13
14namespace Shader::Backend::GLSL {
15namespace {
16std::string TypePrefix(GlslVarType type) {
17 switch (type) {
18 case GlslVarType::U1:
19 return "b_";
20 case GlslVarType::F16x2:
21 return "f16x2_";
22 case GlslVarType::U32:
23 return "u_";
24 case GlslVarType::F32:
25 return "f_";
26 case GlslVarType::U64:
27 return "u64_";
28 case GlslVarType::F64:
29 return "d_";
30 case GlslVarType::U32x2:
31 return "u2_";
32 case GlslVarType::F32x2:
33 return "f2_";
34 case GlslVarType::U32x3:
35 return "u3_";
36 case GlslVarType::F32x3:
37 return "f3_";
38 case GlslVarType::U32x4:
39 return "u4_";
40 case GlslVarType::F32x4:
41 return "f4_";
42 case GlslVarType::PrecF32:
43 return "pf_";
44 case GlslVarType::PrecF64:
45 return "pd_";
46 case GlslVarType::Void:
47 return "";
48 default:
49 throw NotImplementedException("Type {}", type);
50 }
51}
52
53std::string FormatFloat(std::string_view value, IR::Type type) {
54 // TODO: Confirm FP64 nan/inf
55 if (type == IR::Type::F32) {
56 if (value == "nan") {
57 return "utof(0x7fc00000)";
58 }
59 if (value == "inf") {
60 return "utof(0x7f800000)";
61 }
62 if (value == "-inf") {
63 return "utof(0xff800000)";
64 }
65 }
66 if (value.find_first_of('e') != std::string_view::npos) {
67 // scientific notation
68 const auto cast{type == IR::Type::F32 ? "float" : "double"};
69 return fmt::format("{}({})", cast, value);
70 }
71 const bool needs_dot{value.find_first_of('.') == std::string_view::npos};
72 const bool needs_suffix{!value.ends_with('f')};
73 const auto suffix{type == IR::Type::F32 ? "f" : "lf"};
74 return fmt::format("{}{}{}", value, needs_dot ? "." : "", needs_suffix ? suffix : "");
75}
76
77std::string MakeImm(const IR::Value& value) {
78 switch (value.Type()) {
79 case IR::Type::U1:
80 return fmt::format("{}", value.U1() ? "true" : "false");
81 case IR::Type::U32:
82 return fmt::format("{}u", value.U32());
83 case IR::Type::F32:
84 return FormatFloat(fmt::format("{}", value.F32()), IR::Type::F32);
85 case IR::Type::U64:
86 return fmt::format("{}ul", value.U64());
87 case IR::Type::F64:
88 return FormatFloat(fmt::format("{}", value.F64()), IR::Type::F64);
89 case IR::Type::Void:
90 return "";
91 default:
92 throw NotImplementedException("Immediate type {}", value.Type());
93 }
94}
95} // Anonymous namespace
96
97std::string VarAlloc::Representation(u32 index, GlslVarType type) const {
98 const auto prefix{TypePrefix(type)};
99 return fmt::format("{}{}", prefix, index);
100}
101
102std::string VarAlloc::Representation(Id id) const {
103 return Representation(id.index, id.type);
104}
105
106std::string VarAlloc::Define(IR::Inst& inst, GlslVarType type) {
107 if (inst.HasUses()) {
108 inst.SetDefinition<Id>(Alloc(type));
109 return Representation(inst.Definition<Id>());
110 } else {
111 Id id{};
112 id.type.Assign(type);
113 GetUseTracker(type).uses_temp = true;
114 inst.SetDefinition<Id>(id);
115 return 't' + Representation(inst.Definition<Id>());
116 }
117}
118
119std::string VarAlloc::Define(IR::Inst& inst, IR::Type type) {
120 return Define(inst, RegType(type));
121}
122
123std::string VarAlloc::PhiDefine(IR::Inst& inst, IR::Type type) {
124 return AddDefine(inst, RegType(type));
125}
126
127std::string VarAlloc::AddDefine(IR::Inst& inst, GlslVarType type) {
128 if (inst.HasUses()) {
129 inst.SetDefinition<Id>(Alloc(type));
130 return Representation(inst.Definition<Id>());
131 } else {
132 return "";
133 }
134 return Representation(inst.Definition<Id>());
135}
136
137std::string VarAlloc::Consume(const IR::Value& value) {
138 return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive());
139}
140
141std::string VarAlloc::ConsumeInst(IR::Inst& inst) {
142 inst.DestructiveRemoveUsage();
143 if (!inst.HasUses()) {
144 Free(inst.Definition<Id>());
145 }
146 return Representation(inst.Definition<Id>());
147}
148
149std::string VarAlloc::GetGlslType(IR::Type type) const {
150 return GetGlslType(RegType(type));
151}
152
153Id VarAlloc::Alloc(GlslVarType type) {
154 auto& use_tracker{GetUseTracker(type)};
155 const auto num_vars{use_tracker.var_use.size()};
156 for (size_t var = 0; var < num_vars; ++var) {
157 if (use_tracker.var_use[var]) {
158 continue;
159 }
160 use_tracker.num_used = std::max(use_tracker.num_used, var + 1);
161 use_tracker.var_use[var] = true;
162 Id ret{};
163 ret.is_valid.Assign(1);
164 ret.type.Assign(type);
165 ret.index.Assign(static_cast<u32>(var));
166 return ret;
167 }
168 // Allocate a new variable
169 use_tracker.var_use.push_back(true);
170 Id ret{};
171 ret.is_valid.Assign(1);
172 ret.type.Assign(type);
173 ret.index.Assign(static_cast<u32>(use_tracker.num_used));
174 ++use_tracker.num_used;
175 return ret;
176}
177
178void VarAlloc::Free(Id id) {
179 if (id.is_valid == 0) {
180 throw LogicError("Freeing invalid variable");
181 }
182 auto& use_tracker{GetUseTracker(id.type)};
183 use_tracker.var_use[id.index] = false;
184}
185
186GlslVarType VarAlloc::RegType(IR::Type type) const {
187 switch (type) {
188 case IR::Type::U1:
189 return GlslVarType::U1;
190 case IR::Type::U32:
191 return GlslVarType::U32;
192 case IR::Type::F32:
193 return GlslVarType::F32;
194 case IR::Type::U64:
195 return GlslVarType::U64;
196 case IR::Type::F64:
197 return GlslVarType::F64;
198 default:
199 throw NotImplementedException("IR type {}", type);
200 }
201}
202
203std::string VarAlloc::GetGlslType(GlslVarType type) const {
204 switch (type) {
205 case GlslVarType::U1:
206 return "bool";
207 case GlslVarType::F16x2:
208 return "f16vec2";
209 case GlslVarType::U32:
210 return "uint";
211 case GlslVarType::F32:
212 case GlslVarType::PrecF32:
213 return "float";
214 case GlslVarType::U64:
215 return "uint64_t";
216 case GlslVarType::F64:
217 case GlslVarType::PrecF64:
218 return "double";
219 case GlslVarType::U32x2:
220 return "uvec2";
221 case GlslVarType::F32x2:
222 return "vec2";
223 case GlslVarType::U32x3:
224 return "uvec3";
225 case GlslVarType::F32x3:
226 return "vec3";
227 case GlslVarType::U32x4:
228 return "uvec4";
229 case GlslVarType::F32x4:
230 return "vec4";
231 case GlslVarType::Void:
232 return "";
233 default:
234 throw NotImplementedException("Type {}", type);
235 }
236}
237
238VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) {
239 switch (type) {
240 case GlslVarType::U1:
241 return var_bool;
242 case GlslVarType::F16x2:
243 return var_f16x2;
244 case GlslVarType::U32:
245 return var_u32;
246 case GlslVarType::F32:
247 return var_f32;
248 case GlslVarType::U64:
249 return var_u64;
250 case GlslVarType::F64:
251 return var_f64;
252 case GlslVarType::U32x2:
253 return var_u32x2;
254 case GlslVarType::F32x2:
255 return var_f32x2;
256 case GlslVarType::U32x3:
257 return var_u32x3;
258 case GlslVarType::F32x3:
259 return var_f32x3;
260 case GlslVarType::U32x4:
261 return var_u32x4;
262 case GlslVarType::F32x4:
263 return var_f32x4;
264 case GlslVarType::PrecF32:
265 return var_precf32;
266 case GlslVarType::PrecF64:
267 return var_precf64;
268 default:
269 throw NotImplementedException("Type {}", type);
270 }
271}
272
273const VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) const {
274 switch (type) {
275 case GlslVarType::U1:
276 return var_bool;
277 case GlslVarType::F16x2:
278 return var_f16x2;
279 case GlslVarType::U32:
280 return var_u32;
281 case GlslVarType::F32:
282 return var_f32;
283 case GlslVarType::U64:
284 return var_u64;
285 case GlslVarType::F64:
286 return var_f64;
287 case GlslVarType::U32x2:
288 return var_u32x2;
289 case GlslVarType::F32x2:
290 return var_f32x2;
291 case GlslVarType::U32x3:
292 return var_u32x3;
293 case GlslVarType::F32x3:
294 return var_f32x3;
295 case GlslVarType::U32x4:
296 return var_u32x4;
297 case GlslVarType::F32x4:
298 return var_f32x4;
299 case GlslVarType::PrecF32:
300 return var_precf32;
301 case GlslVarType::PrecF64:
302 return var_precf64;
303 default:
304 throw NotImplementedException("Type {}", type);
305 }
306}
307
308} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/var_alloc.h b/src/shader_recompiler/backend/glsl/var_alloc.h
new file mode 100644
index 000000000..8b49f32a6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/var_alloc.h
@@ -0,0 +1,105 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <bitset>
8#include <string>
9#include <vector>
10
11#include "common/bit_field.h"
12#include "common/common_types.h"
13
14namespace Shader::IR {
15class Inst;
16class Value;
17enum class Type;
18} // namespace Shader::IR
19
20namespace Shader::Backend::GLSL {
21enum class GlslVarType : u32 {
22 U1,
23 F16x2,
24 U32,
25 F32,
26 U64,
27 F64,
28 U32x2,
29 F32x2,
30 U32x3,
31 F32x3,
32 U32x4,
33 F32x4,
34 PrecF32,
35 PrecF64,
36 Void,
37};
38
39struct Id {
40 union {
41 u32 raw;
42 BitField<0, 1, u32> is_valid;
43 BitField<1, 4, GlslVarType> type;
44 BitField<6, 26, u32> index;
45 };
46
47 bool operator==(Id rhs) const noexcept {
48 return raw == rhs.raw;
49 }
50 bool operator!=(Id rhs) const noexcept {
51 return !operator==(rhs);
52 }
53};
54static_assert(sizeof(Id) == sizeof(u32));
55
56class VarAlloc {
57public:
58 struct UseTracker {
59 bool uses_temp{};
60 size_t num_used{};
61 std::vector<bool> var_use;
62 };
63
64 /// Used for explicit usages of variables, may revert to temporaries
65 std::string Define(IR::Inst& inst, GlslVarType type);
66 std::string Define(IR::Inst& inst, IR::Type type);
67
68 /// Used to assign variables used by the IR. May return a blank string if
69 /// the instruction's result is unused in the IR.
70 std::string AddDefine(IR::Inst& inst, GlslVarType type);
71 std::string PhiDefine(IR::Inst& inst, IR::Type type);
72
73 std::string Consume(const IR::Value& value);
74 std::string ConsumeInst(IR::Inst& inst);
75
76 std::string GetGlslType(GlslVarType type) const;
77 std::string GetGlslType(IR::Type type) const;
78
79 const UseTracker& GetUseTracker(GlslVarType type) const;
80 std::string Representation(u32 index, GlslVarType type) const;
81
82private:
83 GlslVarType RegType(IR::Type type) const;
84 Id Alloc(GlslVarType type);
85 void Free(Id id);
86 UseTracker& GetUseTracker(GlslVarType type);
87 std::string Representation(Id id) const;
88
89 UseTracker var_bool{};
90 UseTracker var_f16x2{};
91 UseTracker var_u32{};
92 UseTracker var_u32x2{};
93 UseTracker var_u32x3{};
94 UseTracker var_u32x4{};
95 UseTracker var_f32{};
96 UseTracker var_f32x2{};
97 UseTracker var_f32x3{};
98 UseTracker var_f32x4{};
99 UseTracker var_u64{};
100 UseTracker var_f64{};
101 UseTracker var_precf32{};
102 UseTracker var_precf64{};
103};
104
105} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
new file mode 100644
index 000000000..2d29d8c14
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -0,0 +1,1368 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <climits>
8#include <string_view>
9
10#include <fmt/format.h>
11
12#include "common/common_types.h"
13#include "common/div_ceil.h"
14#include "shader_recompiler/backend/spirv/emit_context.h"
15
16namespace Shader::Backend::SPIRV {
17namespace {
18enum class Operation {
19 Increment,
20 Decrement,
21 FPAdd,
22 FPMin,
23 FPMax,
24};
25
26struct AttrInfo {
27 Id pointer;
28 Id id;
29 bool needs_cast;
30};
31
32Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
33 const spv::ImageFormat format{spv::ImageFormat::Unknown};
34 const Id type{ctx.F32[1]};
35 const bool depth{desc.is_depth};
36 switch (desc.type) {
37 case TextureType::Color1D:
38 return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format);
39 case TextureType::ColorArray1D:
40 return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format);
41 case TextureType::Color2D:
42 return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format);
43 case TextureType::ColorArray2D:
44 return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format);
45 case TextureType::Color3D:
46 return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format);
47 case TextureType::ColorCube:
48 return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format);
49 case TextureType::ColorArrayCube:
50 return ctx.TypeImage(type, spv::Dim::Cube, depth, true, false, 1, format);
51 case TextureType::Buffer:
52 break;
53 }
54 throw InvalidArgument("Invalid texture type {}", desc.type);
55}
56
57spv::ImageFormat GetImageFormat(ImageFormat format) {
58 switch (format) {
59 case ImageFormat::Typeless:
60 return spv::ImageFormat::Unknown;
61 case ImageFormat::R8_UINT:
62 return spv::ImageFormat::R8ui;
63 case ImageFormat::R8_SINT:
64 return spv::ImageFormat::R8i;
65 case ImageFormat::R16_UINT:
66 return spv::ImageFormat::R16ui;
67 case ImageFormat::R16_SINT:
68 return spv::ImageFormat::R16i;
69 case ImageFormat::R32_UINT:
70 return spv::ImageFormat::R32ui;
71 case ImageFormat::R32G32_UINT:
72 return spv::ImageFormat::Rg32ui;
73 case ImageFormat::R32G32B32A32_UINT:
74 return spv::ImageFormat::Rgba32ui;
75 }
76 throw InvalidArgument("Invalid image format {}", format);
77}
78
79Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) {
80 const spv::ImageFormat format{GetImageFormat(desc.format)};
81 const Id type{ctx.U32[1]};
82 switch (desc.type) {
83 case TextureType::Color1D:
84 return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 2, format);
85 case TextureType::ColorArray1D:
86 return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 2, format);
87 case TextureType::Color2D:
88 return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 2, format);
89 case TextureType::ColorArray2D:
90 return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 2, format);
91 case TextureType::Color3D:
92 return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 2, format);
93 case TextureType::Buffer:
94 throw NotImplementedException("Image buffer");
95 default:
96 break;
97 }
98 throw InvalidArgument("Invalid texture type {}", desc.type);
99}
100
101Id DefineVariable(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin,
102 spv::StorageClass storage_class) {
103 const Id pointer_type{ctx.TypePointer(storage_class, type)};
104 const Id id{ctx.AddGlobalVariable(pointer_type, storage_class)};
105 if (builtin) {
106 ctx.Decorate(id, spv::Decoration::BuiltIn, *builtin);
107 }
108 ctx.interfaces.push_back(id);
109 return id;
110}
111
112u32 NumVertices(InputTopology input_topology) {
113 switch (input_topology) {
114 case InputTopology::Points:
115 return 1;
116 case InputTopology::Lines:
117 return 2;
118 case InputTopology::LinesAdjacency:
119 return 4;
120 case InputTopology::Triangles:
121 return 3;
122 case InputTopology::TrianglesAdjacency:
123 return 6;
124 }
125 throw InvalidArgument("Invalid input topology {}", input_topology);
126}
127
128Id DefineInput(EmitContext& ctx, Id type, bool per_invocation,
129 std::optional<spv::BuiltIn> builtin = std::nullopt) {
130 switch (ctx.stage) {
131 case Stage::TessellationControl:
132 case Stage::TessellationEval:
133 if (per_invocation) {
134 type = ctx.TypeArray(type, ctx.Const(32u));
135 }
136 break;
137 case Stage::Geometry:
138 if (per_invocation) {
139 const u32 num_vertices{NumVertices(ctx.runtime_info.input_topology)};
140 type = ctx.TypeArray(type, ctx.Const(num_vertices));
141 }
142 break;
143 default:
144 break;
145 }
146 return DefineVariable(ctx, type, builtin, spv::StorageClass::Input);
147}
148
149Id DefineOutput(EmitContext& ctx, Id type, std::optional<u32> invocations,
150 std::optional<spv::BuiltIn> builtin = std::nullopt) {
151 if (invocations && ctx.stage == Stage::TessellationControl) {
152 type = ctx.TypeArray(type, ctx.Const(*invocations));
153 }
154 return DefineVariable(ctx, type, builtin, spv::StorageClass::Output);
155}
156
157void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invocations) {
158 static constexpr std::string_view swizzle{"xyzw"};
159 const size_t base_attr_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
160 u32 element{0};
161 while (element < 4) {
162 const u32 remainder{4 - element};
163 const TransformFeedbackVarying* xfb_varying{};
164 if (!ctx.runtime_info.xfb_varyings.empty()) {
165 xfb_varying = &ctx.runtime_info.xfb_varyings[base_attr_index + element];
166 xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr;
167 }
168 const u32 num_components{xfb_varying ? xfb_varying->components : remainder};
169
170 const Id id{DefineOutput(ctx, ctx.F32[num_components], invocations)};
171 ctx.Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
172 if (element > 0) {
173 ctx.Decorate(id, spv::Decoration::Component, element);
174 }
175 if (xfb_varying) {
176 ctx.Decorate(id, spv::Decoration::XfbBuffer, xfb_varying->buffer);
177 ctx.Decorate(id, spv::Decoration::XfbStride, xfb_varying->stride);
178 ctx.Decorate(id, spv::Decoration::Offset, xfb_varying->offset);
179 }
180 if (num_components < 4 || element > 0) {
181 const std::string_view subswizzle{swizzle.substr(element, num_components)};
182 ctx.Name(id, fmt::format("out_attr{}_{}", index, subswizzle));
183 } else {
184 ctx.Name(id, fmt::format("out_attr{}", index));
185 }
186 const GenericElementInfo info{
187 .id = id,
188 .first_element = element,
189 .num_components = num_components,
190 };
191 std::fill_n(ctx.output_generics[index].begin() + element, num_components, info);
192 element += num_components;
193 }
194}
195
196Id GetAttributeType(EmitContext& ctx, AttributeType type) {
197 switch (type) {
198 case AttributeType::Float:
199 return ctx.F32[4];
200 case AttributeType::SignedInt:
201 return ctx.TypeVector(ctx.TypeInt(32, true), 4);
202 case AttributeType::UnsignedInt:
203 return ctx.U32[4];
204 case AttributeType::Disabled:
205 break;
206 }
207 throw InvalidArgument("Invalid attribute type {}", type);
208}
209
210std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) {
211 const AttributeType type{ctx.runtime_info.generic_input_types.at(index)};
212 switch (type) {
213 case AttributeType::Float:
214 return AttrInfo{ctx.input_f32, ctx.F32[1], false};
215 case AttributeType::UnsignedInt:
216 return AttrInfo{ctx.input_u32, ctx.U32[1], true};
217 case AttributeType::SignedInt:
218 return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true};
219 case AttributeType::Disabled:
220 return std::nullopt;
221 }
222 throw InvalidArgument("Invalid attribute type {}", type);
223}
224
225std::string_view StageName(Stage stage) {
226 switch (stage) {
227 case Stage::VertexA:
228 return "vs_a";
229 case Stage::VertexB:
230 return "vs";
231 case Stage::TessellationControl:
232 return "tcs";
233 case Stage::TessellationEval:
234 return "tes";
235 case Stage::Geometry:
236 return "gs";
237 case Stage::Fragment:
238 return "fs";
239 case Stage::Compute:
240 return "cs";
241 }
242 throw InvalidArgument("Invalid stage {}", stage);
243}
244
245template <typename... Args>
246void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... args) {
247 ctx.Name(object, fmt::format(fmt::runtime(format_str), StageName(ctx.stage),
248 std::forward<Args>(args)...)
249 .c_str());
250}
251
252void DefineConstBuffers(EmitContext& ctx, const Info& info, Id UniformDefinitions::*member_type,
253 u32 binding, Id type, char type_char, u32 element_size) {
254 const Id array_type{ctx.TypeArray(type, ctx.Const(65536U / element_size))};
255 ctx.Decorate(array_type, spv::Decoration::ArrayStride, element_size);
256
257 const Id struct_type{ctx.TypeStruct(array_type)};
258 Name(ctx, struct_type, "{}_cbuf_block_{}{}", ctx.stage, type_char, element_size * CHAR_BIT);
259 ctx.Decorate(struct_type, spv::Decoration::Block);
260 ctx.MemberName(struct_type, 0, "data");
261 ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
262
263 const Id struct_pointer_type{ctx.TypePointer(spv::StorageClass::Uniform, struct_type)};
264 const Id uniform_type{ctx.TypePointer(spv::StorageClass::Uniform, type)};
265 ctx.uniform_types.*member_type = uniform_type;
266
267 for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
268 const Id id{ctx.AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)};
269 ctx.Decorate(id, spv::Decoration::Binding, binding);
270 ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U);
271 ctx.Name(id, fmt::format("c{}", desc.index));
272 for (size_t i = 0; i < desc.count; ++i) {
273 ctx.cbufs[desc.index + i].*member_type = id;
274 }
275 if (ctx.profile.supported_spirv >= 0x00010400) {
276 ctx.interfaces.push_back(id);
277 }
278 binding += desc.count;
279 }
280}
281
282void DefineSsbos(EmitContext& ctx, StorageTypeDefinition& type_def,
283 Id StorageDefinitions::*member_type, const Info& info, u32 binding, Id type,
284 u32 stride) {
285 const Id array_type{ctx.TypeRuntimeArray(type)};
286 ctx.Decorate(array_type, spv::Decoration::ArrayStride, stride);
287
288 const Id struct_type{ctx.TypeStruct(array_type)};
289 ctx.Decorate(struct_type, spv::Decoration::Block);
290 ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
291
292 const Id struct_pointer{ctx.TypePointer(spv::StorageClass::StorageBuffer, struct_type)};
293 type_def.array = struct_pointer;
294 type_def.element = ctx.TypePointer(spv::StorageClass::StorageBuffer, type);
295
296 u32 index{};
297 for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
298 const Id id{ctx.AddGlobalVariable(struct_pointer, spv::StorageClass::StorageBuffer)};
299 ctx.Decorate(id, spv::Decoration::Binding, binding);
300 ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U);
301 ctx.Name(id, fmt::format("ssbo{}", index));
302 if (ctx.profile.supported_spirv >= 0x00010400) {
303 ctx.interfaces.push_back(id);
304 }
305 for (size_t i = 0; i < desc.count; ++i) {
306 ctx.ssbos[index + i].*member_type = id;
307 }
308 index += desc.count;
309 binding += desc.count;
310 }
311}
312
313Id CasFunction(EmitContext& ctx, Operation operation, Id value_type) {
314 const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)};
315 const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)};
316 const Id op_a{ctx.OpFunctionParameter(value_type)};
317 const Id op_b{ctx.OpFunctionParameter(value_type)};
318 ctx.AddLabel();
319 Id result{};
320 switch (operation) {
321 case Operation::Increment: {
322 const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)};
323 const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))};
324 result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr);
325 break;
326 }
327 case Operation::Decrement: {
328 const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))};
329 const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)};
330 const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)};
331 const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))};
332 result = ctx.OpSelect(value_type, pred, op_b, decr);
333 break;
334 }
335 case Operation::FPAdd:
336 result = ctx.OpFAdd(value_type, op_a, op_b);
337 break;
338 case Operation::FPMin:
339 result = ctx.OpFMin(value_type, op_a, op_b);
340 break;
341 case Operation::FPMax:
342 result = ctx.OpFMax(value_type, op_a, op_b);
343 break;
344 default:
345 break;
346 }
347 ctx.OpReturnValue(result);
348 ctx.OpFunctionEnd();
349 return func;
350}
351
352Id CasLoop(EmitContext& ctx, Operation operation, Id array_pointer, Id element_pointer,
353 Id value_type, Id memory_type, spv::Scope scope) {
354 const bool is_shared{scope == spv::Scope::Workgroup};
355 const bool is_struct{!is_shared || ctx.profile.support_explicit_workgroup_layout};
356 const Id cas_func{CasFunction(ctx, operation, value_type)};
357 const Id zero{ctx.u32_zero_value};
358 const Id scope_id{ctx.Const(static_cast<u32>(scope))};
359
360 const Id loop_header{ctx.OpLabel()};
361 const Id continue_block{ctx.OpLabel()};
362 const Id merge_block{ctx.OpLabel()};
363 const Id func_type{is_shared
364 ? ctx.TypeFunction(value_type, ctx.U32[1], value_type)
365 : ctx.TypeFunction(value_type, ctx.U32[1], value_type, array_pointer)};
366
367 const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)};
368 const Id index{ctx.OpFunctionParameter(ctx.U32[1])};
369 const Id op_b{ctx.OpFunctionParameter(value_type)};
370 const Id base{is_shared ? ctx.shared_memory_u32 : ctx.OpFunctionParameter(array_pointer)};
371 ctx.AddLabel();
372 ctx.OpBranch(loop_header);
373 ctx.AddLabel(loop_header);
374
375 ctx.OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
376 ctx.OpBranch(continue_block);
377
378 ctx.AddLabel(continue_block);
379 const Id word_pointer{is_struct ? ctx.OpAccessChain(element_pointer, base, zero, index)
380 : ctx.OpAccessChain(element_pointer, base, index)};
381 if (value_type.value == ctx.F32[2].value) {
382 const Id u32_value{ctx.OpLoad(ctx.U32[1], word_pointer)};
383 const Id value{ctx.OpUnpackHalf2x16(ctx.F32[2], u32_value)};
384 const Id new_value{ctx.OpFunctionCall(value_type, cas_func, value, op_b)};
385 const Id u32_new_value{ctx.OpPackHalf2x16(ctx.U32[1], new_value)};
386 const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero,
387 zero, u32_new_value, u32_value)};
388 const Id success{ctx.OpIEqual(ctx.U1, atomic_res, u32_value)};
389 ctx.OpBranchConditional(success, merge_block, loop_header);
390
391 ctx.AddLabel(merge_block);
392 ctx.OpReturnValue(ctx.OpUnpackHalf2x16(ctx.F32[2], atomic_res));
393 } else {
394 const Id value{ctx.OpLoad(memory_type, word_pointer)};
395 const bool matching_type{value_type.value == memory_type.value};
396 const Id bitcast_value{matching_type ? value : ctx.OpBitcast(value_type, value)};
397 const Id cal_res{ctx.OpFunctionCall(value_type, cas_func, bitcast_value, op_b)};
398 const Id new_value{matching_type ? cal_res : ctx.OpBitcast(memory_type, cal_res)};
399 const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero,
400 zero, new_value, value)};
401 const Id success{ctx.OpIEqual(ctx.U1, atomic_res, value)};
402 ctx.OpBranchConditional(success, merge_block, loop_header);
403
404 ctx.AddLabel(merge_block);
405 ctx.OpReturnValue(ctx.OpBitcast(value_type, atomic_res));
406 }
407 ctx.OpFunctionEnd();
408 return func;
409}
410
411template <typename Desc>
412std::string NameOf(Stage stage, const Desc& desc, std::string_view prefix) {
413 if (desc.count > 1) {
414 return fmt::format("{}_{}{}_{:02x}x{}", StageName(stage), prefix, desc.cbuf_index,
415 desc.cbuf_offset, desc.count);
416 } else {
417 return fmt::format("{}_{}{}_{:02x}", StageName(stage), prefix, desc.cbuf_index,
418 desc.cbuf_offset);
419 }
420}
421
422Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) {
423 if (count > 1) {
424 const Id array_type{ctx.TypeArray(sampled_type, ctx.Const(count))};
425 return ctx.TypePointer(spv::StorageClass::UniformConstant, array_type);
426 } else {
427 return pointer_type;
428 }
429}
430} // Anonymous namespace
431
432void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
433 defs[0] = sirit_ctx.Name(base_type, name);
434
435 std::array<char, 6> def_name;
436 for (int i = 1; i < 4; ++i) {
437 const std::string_view def_name_view(
438 def_name.data(),
439 fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size);
440 defs[static_cast<size_t>(i)] =
441 sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view);
442 }
443}
444
445EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
446 IR::Program& program, Bindings& bindings)
447 : Sirit::Module(profile_.supported_spirv), profile{profile_},
448 runtime_info{runtime_info_}, stage{program.stage} {
449 const bool is_unified{profile.unified_descriptor_binding};
450 u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer};
451 u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer};
452 u32& texture_binding{is_unified ? bindings.unified : bindings.texture};
453 u32& image_binding{is_unified ? bindings.unified : bindings.image};
454 AddCapability(spv::Capability::Shader);
455 DefineCommonTypes(program.info);
456 DefineCommonConstants();
457 DefineInterfaces(program);
458 DefineLocalMemory(program);
459 DefineSharedMemory(program);
460 DefineSharedMemoryFunctions(program);
461 DefineConstantBuffers(program.info, uniform_binding);
462 DefineStorageBuffers(program.info, storage_binding);
463 DefineTextureBuffers(program.info, texture_binding);
464 DefineImageBuffers(program.info, image_binding);
465 DefineTextures(program.info, texture_binding);
466 DefineImages(program.info, image_binding);
467 DefineAttributeMemAccess(program.info);
468 DefineGlobalMemoryFunctions(program.info);
469}
470
471EmitContext::~EmitContext() = default;
472
473Id EmitContext::Def(const IR::Value& value) {
474 if (!value.IsImmediate()) {
475 return value.InstRecursive()->Definition<Id>();
476 }
477 switch (value.Type()) {
478 case IR::Type::Void:
479 // Void instructions are used for optional arguments (e.g. texture offsets)
480 // They are not meant to be used in the SPIR-V module
481 return Id{};
482 case IR::Type::U1:
483 return value.U1() ? true_value : false_value;
484 case IR::Type::U32:
485 return Const(value.U32());
486 case IR::Type::U64:
487 return Constant(U64, value.U64());
488 case IR::Type::F32:
489 return Const(value.F32());
490 case IR::Type::F64:
491 return Constant(F64[1], value.F64());
492 default:
493 throw NotImplementedException("Immediate type {}", value.Type());
494 }
495}
496
497Id EmitContext::BitOffset8(const IR::Value& offset) {
498 if (offset.IsImmediate()) {
499 return Const((offset.U32() % 4) * 8);
500 }
501 return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(24u));
502}
503
504Id EmitContext::BitOffset16(const IR::Value& offset) {
505 if (offset.IsImmediate()) {
506 return Const(((offset.U32() / 2) % 2) * 16);
507 }
508 return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(16u));
509}
510
511void EmitContext::DefineCommonTypes(const Info& info) {
512 void_id = TypeVoid();
513
514 U1 = Name(TypeBool(), "u1");
515
516 F32.Define(*this, TypeFloat(32), "f32");
517 U32.Define(*this, TypeInt(32, false), "u32");
518 S32.Define(*this, TypeInt(32, true), "s32");
519
520 private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32");
521
522 input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32");
523 input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32");
524 input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32");
525
526 output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
527 output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
528
529 if (info.uses_int8 && profile.support_int8) {
530 AddCapability(spv::Capability::Int8);
531 U8 = Name(TypeInt(8, false), "u8");
532 S8 = Name(TypeInt(8, true), "s8");
533 }
534 if (info.uses_int16 && profile.support_int16) {
535 AddCapability(spv::Capability::Int16);
536 U16 = Name(TypeInt(16, false), "u16");
537 S16 = Name(TypeInt(16, true), "s16");
538 }
539 if (info.uses_int64) {
540 AddCapability(spv::Capability::Int64);
541 U64 = Name(TypeInt(64, false), "u64");
542 }
543 if (info.uses_fp16) {
544 AddCapability(spv::Capability::Float16);
545 F16.Define(*this, TypeFloat(16), "f16");
546 }
547 if (info.uses_fp64) {
548 AddCapability(spv::Capability::Float64);
549 F64.Define(*this, TypeFloat(64), "f64");
550 }
551}
552
553void EmitContext::DefineCommonConstants() {
554 true_value = ConstantTrue(U1);
555 false_value = ConstantFalse(U1);
556 u32_zero_value = Const(0U);
557 f32_zero_value = Const(0.0f);
558}
559
560void EmitContext::DefineInterfaces(const IR::Program& program) {
561 DefineInputs(program);
562 DefineOutputs(program);
563}
564
565void EmitContext::DefineLocalMemory(const IR::Program& program) {
566 if (program.local_memory_size == 0) {
567 return;
568 }
569 const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)};
570 const Id type{TypeArray(U32[1], Const(num_elements))};
571 const Id pointer{TypePointer(spv::StorageClass::Private, type)};
572 local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private);
573 if (profile.supported_spirv >= 0x00010400) {
574 interfaces.push_back(local_memory);
575 }
576}
577
578void EmitContext::DefineSharedMemory(const IR::Program& program) {
579 if (program.shared_memory_size == 0) {
580 return;
581 }
582 const auto make{[&](Id element_type, u32 element_size) {
583 const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)};
584 const Id array_type{TypeArray(element_type, Const(num_elements))};
585 Decorate(array_type, spv::Decoration::ArrayStride, element_size);
586
587 const Id struct_type{TypeStruct(array_type)};
588 MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U);
589 Decorate(struct_type, spv::Decoration::Block);
590
591 const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)};
592 const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)};
593 const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)};
594 Decorate(variable, spv::Decoration::Aliased);
595 interfaces.push_back(variable);
596
597 return std::make_tuple(variable, element_pointer, pointer);
598 }};
599 if (profile.support_explicit_workgroup_layout) {
600 AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
601 AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
602 if (program.info.uses_int8) {
603 AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
604 std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1);
605 }
606 if (program.info.uses_int16) {
607 AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
608 std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2);
609 }
610 if (program.info.uses_int64) {
611 std::tie(shared_memory_u64, shared_u64, std::ignore) = make(U64, 8);
612 }
613 std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4);
614 std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8);
615 std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16);
616 return;
617 }
618 const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)};
619 const Id type{TypeArray(U32[1], Const(num_elements))};
620 shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
621
622 shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
623 shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
624 interfaces.push_back(shared_memory_u32);
625
626 const Id func_type{TypeFunction(void_id, U32[1], U32[1])};
627 const auto make_function{[&](u32 mask, u32 size) {
628 const Id loop_header{OpLabel()};
629 const Id continue_block{OpLabel()};
630 const Id merge_block{OpLabel()};
631
632 const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)};
633 const Id offset{OpFunctionParameter(U32[1])};
634 const Id insert_value{OpFunctionParameter(U32[1])};
635 AddLabel();
636 OpBranch(loop_header);
637
638 AddLabel(loop_header);
639 const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Const(2U))};
640 const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Const(3U))};
641 const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Const(mask))};
642 const Id count{Const(size)};
643 OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
644 OpBranch(continue_block);
645
646 AddLabel(continue_block);
647 const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)};
648 const Id old_value{OpLoad(U32[1], word_pointer)};
649 const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)};
650 const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Const(1U), u32_zero_value,
651 u32_zero_value, new_value, old_value)};
652 const Id success{OpIEqual(U1, atomic_res, old_value)};
653 OpBranchConditional(success, merge_block, loop_header);
654
655 AddLabel(merge_block);
656 OpReturn();
657 OpFunctionEnd();
658 return func;
659 }};
660 if (program.info.uses_int8) {
661 shared_store_u8_func = make_function(24, 8);
662 }
663 if (program.info.uses_int16) {
664 shared_store_u16_func = make_function(16, 16);
665 }
666}
667
668void EmitContext::DefineSharedMemoryFunctions(const IR::Program& program) {
669 if (program.info.uses_shared_increment) {
670 increment_cas_shared = CasLoop(*this, Operation::Increment, shared_memory_u32_type,
671 shared_u32, U32[1], U32[1], spv::Scope::Workgroup);
672 }
673 if (program.info.uses_shared_decrement) {
674 decrement_cas_shared = CasLoop(*this, Operation::Decrement, shared_memory_u32_type,
675 shared_u32, U32[1], U32[1], spv::Scope::Workgroup);
676 }
677}
678
679void EmitContext::DefineAttributeMemAccess(const Info& info) {
680 const auto make_load{[&] {
681 const bool is_array{stage == Stage::Geometry};
682 const Id end_block{OpLabel()};
683 const Id default_label{OpLabel()};
684
685 const Id func_type_load{is_array ? TypeFunction(F32[1], U32[1], U32[1])
686 : TypeFunction(F32[1], U32[1])};
687 const Id func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type_load)};
688 const Id offset{OpFunctionParameter(U32[1])};
689 const Id vertex{is_array ? OpFunctionParameter(U32[1]) : Id{}};
690
691 AddLabel();
692 const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))};
693 const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))};
694 const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))};
695 std::vector<Sirit::Literal> literals;
696 std::vector<Id> labels;
697 if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
698 literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2);
699 labels.push_back(OpLabel());
700 }
701 const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
702 for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) {
703 if (!info.loads.Generic(index)) {
704 continue;
705 }
706 literals.push_back(base_attribute_value + index);
707 labels.push_back(OpLabel());
708 }
709 OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone);
710 OpSwitch(compare_index, default_label, literals, labels);
711 AddLabel(default_label);
712 OpReturnValue(Const(0.0f));
713 size_t label_index{0};
714 if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
715 AddLabel(labels[label_index]);
716 const Id pointer{is_array
717 ? OpAccessChain(input_f32, input_position, vertex, masked_index)
718 : OpAccessChain(input_f32, input_position, masked_index)};
719 const Id result{OpLoad(F32[1], pointer)};
720 OpReturnValue(result);
721 ++label_index;
722 }
723 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
724 if (!info.loads.Generic(index)) {
725 continue;
726 }
727 AddLabel(labels[label_index]);
728 const auto type{AttrTypes(*this, static_cast<u32>(index))};
729 if (!type) {
730 OpReturnValue(Const(0.0f));
731 ++label_index;
732 continue;
733 }
734 const Id generic_id{input_generics.at(index)};
735 const Id pointer{is_array
736 ? OpAccessChain(type->pointer, generic_id, vertex, masked_index)
737 : OpAccessChain(type->pointer, generic_id, masked_index)};
738 const Id value{OpLoad(type->id, pointer)};
739 const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value};
740 OpReturnValue(result);
741 ++label_index;
742 }
743 AddLabel(end_block);
744 OpUnreachable();
745 OpFunctionEnd();
746 return func;
747 }};
748 const auto make_store{[&] {
749 const Id end_block{OpLabel()};
750 const Id default_label{OpLabel()};
751
752 const Id func_type_store{TypeFunction(void_id, U32[1], F32[1])};
753 const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type_store)};
754 const Id offset{OpFunctionParameter(U32[1])};
755 const Id store_value{OpFunctionParameter(F32[1])};
756 AddLabel();
757 const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))};
758 const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))};
759 const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))};
760 std::vector<Sirit::Literal> literals;
761 std::vector<Id> labels;
762 if (info.stores.AnyComponent(IR::Attribute::PositionX)) {
763 literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2);
764 labels.push_back(OpLabel());
765 }
766 const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
767 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
768 if (!info.stores.Generic(index)) {
769 continue;
770 }
771 literals.push_back(base_attribute_value + static_cast<u32>(index));
772 labels.push_back(OpLabel());
773 }
774 if (info.stores.ClipDistances()) {
775 literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance0) >> 2);
776 labels.push_back(OpLabel());
777 literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance4) >> 2);
778 labels.push_back(OpLabel());
779 }
780 OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone);
781 OpSwitch(compare_index, default_label, literals, labels);
782 AddLabel(default_label);
783 OpReturn();
784 size_t label_index{0};
785 if (info.stores.AnyComponent(IR::Attribute::PositionX)) {
786 AddLabel(labels[label_index]);
787 const Id pointer{OpAccessChain(output_f32, output_position, masked_index)};
788 OpStore(pointer, store_value);
789 OpReturn();
790 ++label_index;
791 }
792 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
793 if (!info.stores.Generic(index)) {
794 continue;
795 }
796 if (output_generics[index][0].num_components != 4) {
797 throw NotImplementedException("Physical stores and transform feedbacks");
798 }
799 AddLabel(labels[label_index]);
800 const Id generic_id{output_generics[index][0].id};
801 const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)};
802 OpStore(pointer, store_value);
803 OpReturn();
804 ++label_index;
805 }
806 if (info.stores.ClipDistances()) {
807 AddLabel(labels[label_index]);
808 const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)};
809 OpStore(pointer, store_value);
810 OpReturn();
811 ++label_index;
812 AddLabel(labels[label_index]);
813 const Id fixed_index{OpIAdd(U32[1], masked_index, Const(4U))};
814 const Id pointer2{OpAccessChain(output_f32, clip_distances, fixed_index)};
815 OpStore(pointer2, store_value);
816 OpReturn();
817 ++label_index;
818 }
819 AddLabel(end_block);
820 OpUnreachable();
821 OpFunctionEnd();
822 return func;
823 }};
824 if (info.loads_indexed_attributes) {
825 indexed_load_func = make_load();
826 }
827 if (info.stores_indexed_attributes) {
828 indexed_store_func = make_store();
829 }
830}
831
832void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
833 if (!info.uses_global_memory || !profile.support_int64) {
834 return;
835 }
836 using DefPtr = Id StorageDefinitions::*;
837 const Id zero{u32_zero_value};
838 const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift,
839 auto&& callback) {
840 AddLabel();
841 const size_t num_buffers{info.storage_buffers_descriptors.size()};
842 for (size_t index = 0; index < num_buffers; ++index) {
843 if (!info.nvn_buffer_used[index]) {
844 continue;
845 }
846 const auto& ssbo{info.storage_buffers_descriptors[index]};
847 const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)};
848 const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)};
849 const Id ssbo_addr_pointer{OpAccessChain(
850 uniform_types.U32x2, cbufs[ssbo.cbuf_index].U32x2, zero, ssbo_addr_cbuf_offset)};
851 const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32,
852 zero, ssbo_size_cbuf_offset)};
853
854 const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))};
855 const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))};
856 const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)};
857 const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr),
858 OpULessThan(U1, addr, ssbo_end))};
859 const Id then_label{OpLabel()};
860 const Id else_label{OpLabel()};
861 OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone);
862 OpBranchConditional(cond, then_label, else_label);
863 AddLabel(then_label);
864 const Id ssbo_id{ssbos[index].*ssbo_member};
865 const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))};
866 const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))};
867 const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)};
868 callback(ssbo_pointer);
869 AddLabel(else_label);
870 }
871 }};
872 const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) {
873 const Id function_type{TypeFunction(type, U64)};
874 const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)};
875 const Id addr{OpFunctionParameter(U64)};
876 define_body(ssbo_member, addr, element_pointer, shift,
877 [&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); });
878 OpReturnValue(ConstantNull(type));
879 OpFunctionEnd();
880 return func_id;
881 }};
882 const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) {
883 const Id function_type{TypeFunction(void_id, U64, type)};
884 const Id func_id{OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)};
885 const Id addr{OpFunctionParameter(U64)};
886 const Id data{OpFunctionParameter(type)};
887 define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) {
888 OpStore(ssbo_pointer, data);
889 OpReturn();
890 });
891 OpReturn();
892 OpFunctionEnd();
893 return func_id;
894 }};
895 const auto define{
896 [&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) {
897 const Id element_type{type_def.element};
898 const u32 shift{static_cast<u32>(std::countr_zero(size))};
899 const Id load_func{define_load(ssbo_member, element_type, type, shift)};
900 const Id write_func{define_write(ssbo_member, element_type, type, shift)};
901 return std::make_pair(load_func, write_func);
902 }};
903 std::tie(load_global_func_u32, write_global_func_u32) =
904 define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32));
905 std::tie(load_global_func_u32x2, write_global_func_u32x2) =
906 define(&StorageDefinitions::U32x2, storage_types.U32x2, U32[2], sizeof(u32[2]));
907 std::tie(load_global_func_u32x4, write_global_func_u32x4) =
908 define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4]));
909}
910
911void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
912 if (info.constant_buffer_descriptors.empty()) {
913 return;
914 }
915 if (!profile.support_descriptor_aliasing) {
916 DefineConstBuffers(*this, info, &UniformDefinitions::U32x4, binding, U32[4], 'u',
917 sizeof(u32[4]));
918 for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
919 binding += desc.count;
920 }
921 return;
922 }
923 IR::Type types{info.used_constant_buffer_types};
924 if (True(types & IR::Type::U8)) {
925 if (profile.support_int8) {
926 DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8));
927 DefineConstBuffers(*this, info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8));
928 } else {
929 types |= IR::Type::U32;
930 }
931 }
932 if (True(types & IR::Type::U16)) {
933 if (profile.support_int16) {
934 DefineConstBuffers(*this, info, &UniformDefinitions::U16, binding, U16, 'u',
935 sizeof(u16));
936 DefineConstBuffers(*this, info, &UniformDefinitions::S16, binding, S16, 's',
937 sizeof(s16));
938 } else {
939 types |= IR::Type::U32;
940 }
941 }
942 if (True(types & IR::Type::U32)) {
943 DefineConstBuffers(*this, info, &UniformDefinitions::U32, binding, U32[1], 'u',
944 sizeof(u32));
945 }
946 if (True(types & IR::Type::F32)) {
947 DefineConstBuffers(*this, info, &UniformDefinitions::F32, binding, F32[1], 'f',
948 sizeof(f32));
949 }
950 if (True(types & IR::Type::U32x2)) {
951 DefineConstBuffers(*this, info, &UniformDefinitions::U32x2, binding, U32[2], 'u',
952 sizeof(u32[2]));
953 }
954 binding += static_cast<u32>(info.constant_buffer_descriptors.size());
955}
956
957void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
958 if (info.storage_buffers_descriptors.empty()) {
959 return;
960 }
961 AddExtension("SPV_KHR_storage_buffer_storage_class");
962
963 const IR::Type used_types{profile.support_descriptor_aliasing ? info.used_storage_buffer_types
964 : IR::Type::U32};
965 if (profile.support_int8 && True(used_types & IR::Type::U8)) {
966 DefineSsbos(*this, storage_types.U8, &StorageDefinitions::U8, info, binding, U8,
967 sizeof(u8));
968 DefineSsbos(*this, storage_types.S8, &StorageDefinitions::S8, info, binding, S8,
969 sizeof(u8));
970 }
971 if (profile.support_int16 && True(used_types & IR::Type::U16)) {
972 DefineSsbos(*this, storage_types.U16, &StorageDefinitions::U16, info, binding, U16,
973 sizeof(u16));
974 DefineSsbos(*this, storage_types.S16, &StorageDefinitions::S16, info, binding, S16,
975 sizeof(u16));
976 }
977 if (True(used_types & IR::Type::U32)) {
978 DefineSsbos(*this, storage_types.U32, &StorageDefinitions::U32, info, binding, U32[1],
979 sizeof(u32));
980 }
981 if (True(used_types & IR::Type::F32)) {
982 DefineSsbos(*this, storage_types.F32, &StorageDefinitions::F32, info, binding, F32[1],
983 sizeof(f32));
984 }
985 if (True(used_types & IR::Type::U64)) {
986 DefineSsbos(*this, storage_types.U64, &StorageDefinitions::U64, info, binding, U64,
987 sizeof(u64));
988 }
989 if (True(used_types & IR::Type::U32x2)) {
990 DefineSsbos(*this, storage_types.U32x2, &StorageDefinitions::U32x2, info, binding, U32[2],
991 sizeof(u32[2]));
992 }
993 if (True(used_types & IR::Type::U32x4)) {
994 DefineSsbos(*this, storage_types.U32x4, &StorageDefinitions::U32x4, info, binding, U32[4],
995 sizeof(u32[4]));
996 }
997 for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
998 binding += desc.count;
999 }
1000 const bool needs_function{
1001 info.uses_global_increment || info.uses_global_decrement || info.uses_atomic_f32_add ||
1002 info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max ||
1003 info.uses_atomic_f32x2_add || info.uses_atomic_f32x2_min || info.uses_atomic_f32x2_max};
1004 if (needs_function) {
1005 AddCapability(spv::Capability::VariablePointersStorageBuffer);
1006 }
1007 if (info.uses_global_increment) {
1008 increment_cas_ssbo = CasLoop(*this, Operation::Increment, storage_types.U32.array,
1009 storage_types.U32.element, U32[1], U32[1], spv::Scope::Device);
1010 }
1011 if (info.uses_global_decrement) {
1012 decrement_cas_ssbo = CasLoop(*this, Operation::Decrement, storage_types.U32.array,
1013 storage_types.U32.element, U32[1], U32[1], spv::Scope::Device);
1014 }
1015 if (info.uses_atomic_f32_add) {
1016 f32_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array,
1017 storage_types.U32.element, F32[1], U32[1], spv::Scope::Device);
1018 }
1019 if (info.uses_atomic_f16x2_add) {
1020 f16x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array,
1021 storage_types.U32.element, F16[2], F16[2], spv::Scope::Device);
1022 }
1023 if (info.uses_atomic_f16x2_min) {
1024 f16x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array,
1025 storage_types.U32.element, F16[2], F16[2], spv::Scope::Device);
1026 }
1027 if (info.uses_atomic_f16x2_max) {
1028 f16x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array,
1029 storage_types.U32.element, F16[2], F16[2], spv::Scope::Device);
1030 }
1031 if (info.uses_atomic_f32x2_add) {
1032 f32x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array,
1033 storage_types.U32.element, F32[2], F32[2], spv::Scope::Device);
1034 }
1035 if (info.uses_atomic_f32x2_min) {
1036 f32x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array,
1037 storage_types.U32.element, F32[2], F32[2], spv::Scope::Device);
1038 }
1039 if (info.uses_atomic_f32x2_max) {
1040 f32x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array,
1041 storage_types.U32.element, F32[2], F32[2], spv::Scope::Device);
1042 }
1043}
1044
1045void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) {
1046 if (info.texture_buffer_descriptors.empty()) {
1047 return;
1048 }
1049 const spv::ImageFormat format{spv::ImageFormat::Unknown};
1050 image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format);
1051 sampled_texture_buffer_type = TypeSampledImage(image_buffer_type);
1052
1053 const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)};
1054 texture_buffers.reserve(info.texture_buffer_descriptors.size());
1055 for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) {
1056 if (desc.count != 1) {
1057 throw NotImplementedException("Array of texture buffers");
1058 }
1059 const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)};
1060 Decorate(id, spv::Decoration::Binding, binding);
1061 Decorate(id, spv::Decoration::DescriptorSet, 0U);
1062 Name(id, NameOf(stage, desc, "texbuf"));
1063 texture_buffers.push_back({
1064 .id = id,
1065 .count = desc.count,
1066 });
1067 if (profile.supported_spirv >= 0x00010400) {
1068 interfaces.push_back(id);
1069 }
1070 ++binding;
1071 }
1072}
1073
1074void EmitContext::DefineImageBuffers(const Info& info, u32& binding) {
1075 image_buffers.reserve(info.image_buffer_descriptors.size());
1076 for (const ImageBufferDescriptor& desc : info.image_buffer_descriptors) {
1077 if (desc.count != 1) {
1078 throw NotImplementedException("Array of image buffers");
1079 }
1080 const spv::ImageFormat format{GetImageFormat(desc.format)};
1081 const Id image_type{TypeImage(U32[1], spv::Dim::Buffer, false, false, false, 2, format)};
1082 const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
1083 const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
1084 Decorate(id, spv::Decoration::Binding, binding);
1085 Decorate(id, spv::Decoration::DescriptorSet, 0U);
1086 Name(id, NameOf(stage, desc, "imgbuf"));
1087 image_buffers.push_back({
1088 .id = id,
1089 .image_type = image_type,
1090 .count = desc.count,
1091 });
1092 if (profile.supported_spirv >= 0x00010400) {
1093 interfaces.push_back(id);
1094 }
1095 ++binding;
1096 }
1097}
1098
1099void EmitContext::DefineTextures(const Info& info, u32& binding) {
1100 textures.reserve(info.texture_descriptors.size());
1101 for (const TextureDescriptor& desc : info.texture_descriptors) {
1102 const Id image_type{ImageType(*this, desc)};
1103 const Id sampled_type{TypeSampledImage(image_type)};
1104 const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)};
1105 const Id desc_type{DescType(*this, sampled_type, pointer_type, desc.count)};
1106 const Id id{AddGlobalVariable(desc_type, spv::StorageClass::UniformConstant)};
1107 Decorate(id, spv::Decoration::Binding, binding);
1108 Decorate(id, spv::Decoration::DescriptorSet, 0U);
1109 Name(id, NameOf(stage, desc, "tex"));
1110 textures.push_back({
1111 .id = id,
1112 .sampled_type = sampled_type,
1113 .pointer_type = pointer_type,
1114 .image_type = image_type,
1115 .count = desc.count,
1116 });
1117 if (profile.supported_spirv >= 0x00010400) {
1118 interfaces.push_back(id);
1119 }
1120 ++binding;
1121 }
1122 if (info.uses_atomic_image_u32) {
1123 image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
1124 }
1125}
1126
1127void EmitContext::DefineImages(const Info& info, u32& binding) {
1128 images.reserve(info.image_descriptors.size());
1129 for (const ImageDescriptor& desc : info.image_descriptors) {
1130 if (desc.count != 1) {
1131 throw NotImplementedException("Array of images");
1132 }
1133 const Id image_type{ImageType(*this, desc)};
1134 const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
1135 const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
1136 Decorate(id, spv::Decoration::Binding, binding);
1137 Decorate(id, spv::Decoration::DescriptorSet, 0U);
1138 Name(id, NameOf(stage, desc, "img"));
1139 images.push_back({
1140 .id = id,
1141 .image_type = image_type,
1142 .count = desc.count,
1143 });
1144 if (profile.supported_spirv >= 0x00010400) {
1145 interfaces.push_back(id);
1146 }
1147 ++binding;
1148 }
1149}
1150
1151void EmitContext::DefineInputs(const IR::Program& program) {
1152 const Info& info{program.info};
1153 const VaryingState loads{info.loads.mask | info.passthrough.mask};
1154
1155 if (info.uses_workgroup_id) {
1156 workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId);
1157 }
1158 if (info.uses_local_invocation_id) {
1159 local_invocation_id = DefineInput(*this, U32[3], false, spv::BuiltIn::LocalInvocationId);
1160 }
1161 if (info.uses_invocation_id) {
1162 invocation_id = DefineInput(*this, U32[1], false, spv::BuiltIn::InvocationId);
1163 }
1164 if (info.uses_sample_id) {
1165 sample_id = DefineInput(*this, U32[1], false, spv::BuiltIn::SampleId);
1166 }
1167 if (info.uses_is_helper_invocation) {
1168 is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation);
1169 }
1170 if (info.uses_subgroup_mask) {
1171 subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR);
1172 subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR);
1173 subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR);
1174 subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR);
1175 subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR);
1176 }
1177 if (info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles ||
1178 (profile.warp_size_potentially_larger_than_guest &&
1179 (info.uses_subgroup_vote || info.uses_subgroup_mask))) {
1180 subgroup_local_invocation_id =
1181 DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId);
1182 }
1183 if (info.uses_fswzadd) {
1184 const Id f32_one{Const(1.0f)};
1185 const Id f32_minus_one{Const(-1.0f)};
1186 const Id f32_zero{Const(0.0f)};
1187 fswzadd_lut_a = ConstantComposite(F32[4], f32_minus_one, f32_one, f32_minus_one, f32_zero);
1188 fswzadd_lut_b =
1189 ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one);
1190 }
1191 if (loads[IR::Attribute::PrimitiveId]) {
1192 primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId);
1193 }
1194 if (loads.AnyComponent(IR::Attribute::PositionX)) {
1195 const bool is_fragment{stage != Stage::Fragment};
1196 const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
1197 input_position = DefineInput(*this, F32[4], true, built_in);
1198 if (profile.support_geometry_shader_passthrough) {
1199 if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
1200 Decorate(input_position, spv::Decoration::PassthroughNV);
1201 }
1202 }
1203 }
1204 if (loads[IR::Attribute::InstanceId]) {
1205 if (profile.support_vertex_instance_id) {
1206 instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId);
1207 } else {
1208 instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex);
1209 base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
1210 }
1211 }
1212 if (loads[IR::Attribute::VertexId]) {
1213 if (profile.support_vertex_instance_id) {
1214 vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId);
1215 } else {
1216 vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex);
1217 base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
1218 }
1219 }
1220 if (loads[IR::Attribute::FrontFace]) {
1221 front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing);
1222 }
1223 if (loads[IR::Attribute::PointSpriteS] || loads[IR::Attribute::PointSpriteT]) {
1224 point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord);
1225 }
1226 if (loads[IR::Attribute::TessellationEvaluationPointU] ||
1227 loads[IR::Attribute::TessellationEvaluationPointV]) {
1228 tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord);
1229 }
1230 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
1231 const AttributeType input_type{runtime_info.generic_input_types[index]};
1232 if (!runtime_info.previous_stage_stores.Generic(index)) {
1233 continue;
1234 }
1235 if (!loads.Generic(index)) {
1236 continue;
1237 }
1238 if (input_type == AttributeType::Disabled) {
1239 continue;
1240 }
1241 const Id type{GetAttributeType(*this, input_type)};
1242 const Id id{DefineInput(*this, type, true)};
1243 Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
1244 Name(id, fmt::format("in_attr{}", index));
1245 input_generics[index] = id;
1246
1247 if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) {
1248 Decorate(id, spv::Decoration::PassthroughNV);
1249 }
1250 if (stage != Stage::Fragment) {
1251 continue;
1252 }
1253 switch (info.interpolation[index]) {
1254 case Interpolation::Smooth:
1255 // Default
1256 // Decorate(id, spv::Decoration::Smooth);
1257 break;
1258 case Interpolation::NoPerspective:
1259 Decorate(id, spv::Decoration::NoPerspective);
1260 break;
1261 case Interpolation::Flat:
1262 Decorate(id, spv::Decoration::Flat);
1263 break;
1264 }
1265 }
1266 if (stage == Stage::TessellationEval) {
1267 for (size_t index = 0; index < info.uses_patches.size(); ++index) {
1268 if (!info.uses_patches[index]) {
1269 continue;
1270 }
1271 const Id id{DefineInput(*this, F32[4], false)};
1272 Decorate(id, spv::Decoration::Patch);
1273 Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
1274 patches[index] = id;
1275 }
1276 }
1277}
1278
1279void EmitContext::DefineOutputs(const IR::Program& program) {
1280 const Info& info{program.info};
1281 const std::optional<u32> invocations{program.invocations};
1282 if (info.stores.AnyComponent(IR::Attribute::PositionX) || stage == Stage::VertexB) {
1283 output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position);
1284 }
1285 if (info.stores[IR::Attribute::PointSize] || runtime_info.fixed_state_point_size) {
1286 if (stage == Stage::Fragment) {
1287 throw NotImplementedException("Storing PointSize in fragment stage");
1288 }
1289 output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize);
1290 }
1291 if (info.stores.ClipDistances()) {
1292 if (stage == Stage::Fragment) {
1293 throw NotImplementedException("Storing ClipDistance in fragment stage");
1294 }
1295 const Id type{TypeArray(F32[1], Const(8U))};
1296 clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance);
1297 }
1298 if (info.stores[IR::Attribute::Layer] &&
1299 (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) {
1300 if (stage == Stage::Fragment) {
1301 throw NotImplementedException("Storing Layer in fragment stage");
1302 }
1303 layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer);
1304 }
1305 if (info.stores[IR::Attribute::ViewportIndex] &&
1306 (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) {
1307 if (stage == Stage::Fragment) {
1308 throw NotImplementedException("Storing ViewportIndex in fragment stage");
1309 }
1310 viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex);
1311 }
1312 if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) {
1313 viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt,
1314 spv::BuiltIn::ViewportMaskNV);
1315 }
1316 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
1317 if (info.stores.Generic(index)) {
1318 DefineGenericOutput(*this, index, invocations);
1319 }
1320 }
1321 switch (stage) {
1322 case Stage::TessellationControl:
1323 if (info.stores_tess_level_outer) {
1324 const Id type{TypeArray(F32[1], Const(4U))};
1325 output_tess_level_outer =
1326 DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelOuter);
1327 Decorate(output_tess_level_outer, spv::Decoration::Patch);
1328 }
1329 if (info.stores_tess_level_inner) {
1330 const Id type{TypeArray(F32[1], Const(2U))};
1331 output_tess_level_inner =
1332 DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelInner);
1333 Decorate(output_tess_level_inner, spv::Decoration::Patch);
1334 }
1335 for (size_t index = 0; index < info.uses_patches.size(); ++index) {
1336 if (!info.uses_patches[index]) {
1337 continue;
1338 }
1339 const Id id{DefineOutput(*this, F32[4], std::nullopt)};
1340 Decorate(id, spv::Decoration::Patch);
1341 Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
1342 patches[index] = id;
1343 }
1344 break;
1345 case Stage::Fragment:
1346 for (u32 index = 0; index < 8; ++index) {
1347 if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) {
1348 continue;
1349 }
1350 frag_color[index] = DefineOutput(*this, F32[4], std::nullopt);
1351 Decorate(frag_color[index], spv::Decoration::Location, index);
1352 Name(frag_color[index], fmt::format("frag_color{}", index));
1353 }
1354 if (info.stores_frag_depth) {
1355 frag_depth = DefineOutput(*this, F32[1], std::nullopt);
1356 Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
1357 }
1358 if (info.stores_sample_mask) {
1359 sample_mask = DefineOutput(*this, U32[1], std::nullopt);
1360 Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask);
1361 }
1362 break;
1363 default:
1364 break;
1365 }
1366}
1367
1368} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
new file mode 100644
index 000000000..e277bc358
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -0,0 +1,307 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <string_view>
9
10#include <sirit/sirit.h>
11
12#include "shader_recompiler/backend/bindings.h"
13#include "shader_recompiler/frontend/ir/program.h"
14#include "shader_recompiler/profile.h"
15#include "shader_recompiler/runtime_info.h"
16#include "shader_recompiler/shader_info.h"
17
18namespace Shader::Backend::SPIRV {
19
20using Sirit::Id;
21
22class VectorTypes {
23public:
24 void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name);
25
26 [[nodiscard]] Id operator[](size_t size) const noexcept {
27 return defs[size - 1];
28 }
29
30private:
31 std::array<Id, 4> defs{};
32};
33
34struct TextureDefinition {
35 Id id;
36 Id sampled_type;
37 Id pointer_type;
38 Id image_type;
39 u32 count;
40};
41
42struct TextureBufferDefinition {
43 Id id;
44 u32 count;
45};
46
47struct ImageBufferDefinition {
48 Id id;
49 Id image_type;
50 u32 count;
51};
52
53struct ImageDefinition {
54 Id id;
55 Id image_type;
56 u32 count;
57};
58
59struct UniformDefinitions {
60 Id U8{};
61 Id S8{};
62 Id U16{};
63 Id S16{};
64 Id U32{};
65 Id F32{};
66 Id U32x2{};
67 Id U32x4{};
68};
69
70struct StorageTypeDefinition {
71 Id array{};
72 Id element{};
73};
74
75struct StorageTypeDefinitions {
76 StorageTypeDefinition U8{};
77 StorageTypeDefinition S8{};
78 StorageTypeDefinition U16{};
79 StorageTypeDefinition S16{};
80 StorageTypeDefinition U32{};
81 StorageTypeDefinition U64{};
82 StorageTypeDefinition F32{};
83 StorageTypeDefinition U32x2{};
84 StorageTypeDefinition U32x4{};
85};
86
87struct StorageDefinitions {
88 Id U8{};
89 Id S8{};
90 Id U16{};
91 Id S16{};
92 Id U32{};
93 Id F32{};
94 Id U64{};
95 Id U32x2{};
96 Id U32x4{};
97};
98
99struct GenericElementInfo {
100 Id id{};
101 u32 first_element{};
102 u32 num_components{};
103};
104
105class EmitContext final : public Sirit::Module {
106public:
107 explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info,
108 IR::Program& program, Bindings& binding);
109 ~EmitContext();
110
111 [[nodiscard]] Id Def(const IR::Value& value);
112
113 [[nodiscard]] Id BitOffset8(const IR::Value& offset);
114 [[nodiscard]] Id BitOffset16(const IR::Value& offset);
115
116 Id Const(u32 value) {
117 return Constant(U32[1], value);
118 }
119
120 Id Const(u32 element_1, u32 element_2) {
121 return ConstantComposite(U32[2], Const(element_1), Const(element_2));
122 }
123
124 Id Const(u32 element_1, u32 element_2, u32 element_3) {
125 return ConstantComposite(U32[3], Const(element_1), Const(element_2), Const(element_3));
126 }
127
128 Id Const(u32 element_1, u32 element_2, u32 element_3, u32 element_4) {
129 return ConstantComposite(U32[4], Const(element_1), Const(element_2), Const(element_3),
130 Const(element_4));
131 }
132
133 Id SConst(s32 value) {
134 return Constant(S32[1], value);
135 }
136
137 Id SConst(s32 element_1, s32 element_2) {
138 return ConstantComposite(S32[2], SConst(element_1), SConst(element_2));
139 }
140
141 Id SConst(s32 element_1, s32 element_2, s32 element_3) {
142 return ConstantComposite(S32[3], SConst(element_1), SConst(element_2), SConst(element_3));
143 }
144
145 Id SConst(s32 element_1, s32 element_2, s32 element_3, s32 element_4) {
146 return ConstantComposite(S32[4], SConst(element_1), SConst(element_2), SConst(element_3),
147 SConst(element_4));
148 }
149
150 Id Const(f32 value) {
151 return Constant(F32[1], value);
152 }
153
154 const Profile& profile;
155 const RuntimeInfo& runtime_info;
156 Stage stage{};
157
158 Id void_id{};
159 Id U1{};
160 Id U8{};
161 Id S8{};
162 Id U16{};
163 Id S16{};
164 Id U64{};
165 VectorTypes F32;
166 VectorTypes U32;
167 VectorTypes S32;
168 VectorTypes F16;
169 VectorTypes F64;
170
171 Id true_value{};
172 Id false_value{};
173 Id u32_zero_value{};
174 Id f32_zero_value{};
175
176 UniformDefinitions uniform_types;
177 StorageTypeDefinitions storage_types;
178
179 Id private_u32{};
180
181 Id shared_u8{};
182 Id shared_u16{};
183 Id shared_u32{};
184 Id shared_u64{};
185 Id shared_u32x2{};
186 Id shared_u32x4{};
187
188 Id input_f32{};
189 Id input_u32{};
190 Id input_s32{};
191
192 Id output_f32{};
193 Id output_u32{};
194
195 Id image_buffer_type{};
196 Id sampled_texture_buffer_type{};
197 Id image_u32{};
198
199 std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{};
200 std::array<StorageDefinitions, Info::MAX_SSBOS> ssbos{};
201 std::vector<TextureBufferDefinition> texture_buffers;
202 std::vector<ImageBufferDefinition> image_buffers;
203 std::vector<TextureDefinition> textures;
204 std::vector<ImageDefinition> images;
205
206 Id workgroup_id{};
207 Id local_invocation_id{};
208 Id invocation_id{};
209 Id sample_id{};
210 Id is_helper_invocation{};
211 Id subgroup_local_invocation_id{};
212 Id subgroup_mask_eq{};
213 Id subgroup_mask_lt{};
214 Id subgroup_mask_le{};
215 Id subgroup_mask_gt{};
216 Id subgroup_mask_ge{};
217 Id instance_id{};
218 Id instance_index{};
219 Id base_instance{};
220 Id vertex_id{};
221 Id vertex_index{};
222 Id base_vertex{};
223 Id front_face{};
224 Id point_coord{};
225 Id tess_coord{};
226 Id clip_distances{};
227 Id layer{};
228 Id viewport_index{};
229 Id viewport_mask{};
230 Id primitive_id{};
231
232 Id fswzadd_lut_a{};
233 Id fswzadd_lut_b{};
234
235 Id indexed_load_func{};
236 Id indexed_store_func{};
237
238 Id local_memory{};
239
240 Id shared_memory_u8{};
241 Id shared_memory_u16{};
242 Id shared_memory_u32{};
243 Id shared_memory_u64{};
244 Id shared_memory_u32x2{};
245 Id shared_memory_u32x4{};
246
247 Id shared_memory_u32_type{};
248
249 Id shared_store_u8_func{};
250 Id shared_store_u16_func{};
251 Id increment_cas_shared{};
252 Id increment_cas_ssbo{};
253 Id decrement_cas_shared{};
254 Id decrement_cas_ssbo{};
255 Id f32_add_cas{};
256 Id f16x2_add_cas{};
257 Id f16x2_min_cas{};
258 Id f16x2_max_cas{};
259 Id f32x2_add_cas{};
260 Id f32x2_min_cas{};
261 Id f32x2_max_cas{};
262
263 Id load_global_func_u32{};
264 Id load_global_func_u32x2{};
265 Id load_global_func_u32x4{};
266 Id write_global_func_u32{};
267 Id write_global_func_u32x2{};
268 Id write_global_func_u32x4{};
269
270 Id input_position{};
271 std::array<Id, 32> input_generics{};
272
273 Id output_point_size{};
274 Id output_position{};
275 std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
276
277 Id output_tess_level_outer{};
278 Id output_tess_level_inner{};
279 std::array<Id, 30> patches{};
280
281 std::array<Id, 8> frag_color{};
282 Id sample_mask{};
283 Id frag_depth{};
284
285 std::vector<Id> interfaces;
286
287private:
288 void DefineCommonTypes(const Info& info);
289 void DefineCommonConstants();
290 void DefineInterfaces(const IR::Program& program);
291 void DefineLocalMemory(const IR::Program& program);
292 void DefineSharedMemory(const IR::Program& program);
293 void DefineSharedMemoryFunctions(const IR::Program& program);
294 void DefineConstantBuffers(const Info& info, u32& binding);
295 void DefineStorageBuffers(const Info& info, u32& binding);
296 void DefineTextureBuffers(const Info& info, u32& binding);
297 void DefineImageBuffers(const Info& info, u32& binding);
298 void DefineTextures(const Info& info, u32& binding);
299 void DefineImages(const Info& info, u32& binding);
300 void DefineAttributeMemAccess(const Info& info);
301 void DefineGlobalMemoryFunctions(const Info& info);
302
303 void DefineInputs(const IR::Program& program);
304 void DefineOutputs(const IR::Program& program);
305};
306
307} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
new file mode 100644
index 000000000..d7a86e270
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -0,0 +1,541 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <span>
6#include <tuple>
7#include <type_traits>
8#include <utility>
9#include <vector>
10
11#include "common/settings.h"
12#include "shader_recompiler/backend/spirv/emit_spirv.h"
13#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
14#include "shader_recompiler/frontend/ir/basic_block.h"
15#include "shader_recompiler/frontend/ir/program.h"
16
17namespace Shader::Backend::SPIRV {
18namespace {
19template <class Func>
20struct FuncTraits {};
21
22template <class ReturnType_, class... Args>
23struct FuncTraits<ReturnType_ (*)(Args...)> {
24 using ReturnType = ReturnType_;
25
26 static constexpr size_t NUM_ARGS = sizeof...(Args);
27
28 template <size_t I>
29 using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
30};
31
32template <auto func, typename... Args>
33void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) {
34 inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...));
35}
36
37template <typename ArgType>
38ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
39 if constexpr (std::is_same_v<ArgType, Id>) {
40 return ctx.Def(arg);
41 } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
42 return arg;
43 } else if constexpr (std::is_same_v<ArgType, u32>) {
44 return arg.U32();
45 } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
46 return arg.Attribute();
47 } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
48 return arg.Patch();
49 } else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
50 return arg.Reg();
51 }
52}
53
54template <auto func, bool is_first_arg_inst, size_t... I>
55void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
56 using Traits = FuncTraits<decltype(func)>;
57 if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) {
58 if constexpr (is_first_arg_inst) {
59 SetDefinition<func>(
60 ctx, inst, inst,
61 Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
62 } else {
63 SetDefinition<func>(
64 ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
65 }
66 } else {
67 if constexpr (is_first_arg_inst) {
68 func(ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
69 } else {
70 func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
71 }
72 }
73}
74
75template <auto func>
76void Invoke(EmitContext& ctx, IR::Inst* inst) {
77 using Traits = FuncTraits<decltype(func)>;
78 static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
79 if constexpr (Traits::NUM_ARGS == 1) {
80 Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
81 } else {
82 using FirstArgType = typename Traits::template ArgType<1>;
83 static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst*>;
84 using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
85 Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
86 }
87}
88
89void EmitInst(EmitContext& ctx, IR::Inst* inst) {
90 switch (inst->GetOpcode()) {
91#define OPCODE(name, result_type, ...) \
92 case IR::Opcode::name: \
93 return Invoke<&Emit##name>(ctx, inst);
94#include "shader_recompiler/frontend/ir/opcodes.inc"
95#undef OPCODE
96 }
97 throw LogicError("Invalid opcode {}", inst->GetOpcode());
98}
99
100Id TypeId(const EmitContext& ctx, IR::Type type) {
101 switch (type) {
102 case IR::Type::U1:
103 return ctx.U1;
104 case IR::Type::U32:
105 return ctx.U32[1];
106 default:
107 throw NotImplementedException("Phi node type {}", type);
108 }
109}
110
111void Traverse(EmitContext& ctx, IR::Program& program) {
112 IR::Block* current_block{};
113 for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
114 switch (node.type) {
115 case IR::AbstractSyntaxNode::Type::Block: {
116 const Id label{node.data.block->Definition<Id>()};
117 if (current_block) {
118 ctx.OpBranch(label);
119 }
120 current_block = node.data.block;
121 ctx.AddLabel(label);
122 for (IR::Inst& inst : node.data.block->Instructions()) {
123 EmitInst(ctx, &inst);
124 }
125 break;
126 }
127 case IR::AbstractSyntaxNode::Type::If: {
128 const Id if_label{node.data.if_node.body->Definition<Id>()};
129 const Id endif_label{node.data.if_node.merge->Definition<Id>()};
130 ctx.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
131 ctx.OpBranchConditional(ctx.Def(node.data.if_node.cond), if_label, endif_label);
132 break;
133 }
134 case IR::AbstractSyntaxNode::Type::Loop: {
135 const Id body_label{node.data.loop.body->Definition<Id>()};
136 const Id continue_label{node.data.loop.continue_block->Definition<Id>()};
137 const Id endloop_label{node.data.loop.merge->Definition<Id>()};
138
139 ctx.OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone);
140 ctx.OpBranch(body_label);
141 break;
142 }
143 case IR::AbstractSyntaxNode::Type::Break: {
144 const Id break_label{node.data.break_node.merge->Definition<Id>()};
145 const Id skip_label{node.data.break_node.skip->Definition<Id>()};
146 ctx.OpBranchConditional(ctx.Def(node.data.break_node.cond), break_label, skip_label);
147 break;
148 }
149 case IR::AbstractSyntaxNode::Type::EndIf:
150 if (current_block) {
151 ctx.OpBranch(node.data.end_if.merge->Definition<Id>());
152 }
153 break;
154 case IR::AbstractSyntaxNode::Type::Repeat: {
155 Id cond{ctx.Def(node.data.repeat.cond)};
156 if (!Settings::values.disable_shader_loop_safety_checks) {
157 const Id pointer_type{ctx.TypePointer(spv::StorageClass::Private, ctx.U32[1])};
158 const Id safety_counter{ctx.AddGlobalVariable(
159 pointer_type, spv::StorageClass::Private, ctx.Const(0x2000u))};
160 if (ctx.profile.supported_spirv >= 0x00010400) {
161 ctx.interfaces.push_back(safety_counter);
162 }
163 const Id old_counter{ctx.OpLoad(ctx.U32[1], safety_counter)};
164 const Id new_counter{ctx.OpISub(ctx.U32[1], old_counter, ctx.Const(1u))};
165 ctx.OpStore(safety_counter, new_counter);
166
167 const Id safety_cond{
168 ctx.OpSGreaterThanEqual(ctx.U1, new_counter, ctx.u32_zero_value)};
169 cond = ctx.OpLogicalAnd(ctx.U1, cond, safety_cond);
170 }
171 const Id loop_header_label{node.data.repeat.loop_header->Definition<Id>()};
172 const Id merge_label{node.data.repeat.merge->Definition<Id>()};
173 ctx.OpBranchConditional(cond, loop_header_label, merge_label);
174 break;
175 }
176 case IR::AbstractSyntaxNode::Type::Return:
177 ctx.OpReturn();
178 break;
179 case IR::AbstractSyntaxNode::Type::Unreachable:
180 ctx.OpUnreachable();
181 break;
182 }
183 if (node.type != IR::AbstractSyntaxNode::Type::Block) {
184 current_block = nullptr;
185 }
186 }
187}
188
189Id DefineMain(EmitContext& ctx, IR::Program& program) {
190 const Id void_function{ctx.TypeFunction(ctx.void_id)};
191 const Id main{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)};
192 for (IR::Block* const block : program.blocks) {
193 block->SetDefinition(ctx.OpLabel());
194 }
195 Traverse(ctx, program);
196 ctx.OpFunctionEnd();
197 return main;
198}
199
200spv::ExecutionMode ExecutionMode(TessPrimitive primitive) {
201 switch (primitive) {
202 case TessPrimitive::Isolines:
203 return spv::ExecutionMode::Isolines;
204 case TessPrimitive::Triangles:
205 return spv::ExecutionMode::Triangles;
206 case TessPrimitive::Quads:
207 return spv::ExecutionMode::Quads;
208 }
209 throw InvalidArgument("Tessellation primitive {}", primitive);
210}
211
212spv::ExecutionMode ExecutionMode(TessSpacing spacing) {
213 switch (spacing) {
214 case TessSpacing::Equal:
215 return spv::ExecutionMode::SpacingEqual;
216 case TessSpacing::FractionalOdd:
217 return spv::ExecutionMode::SpacingFractionalOdd;
218 case TessSpacing::FractionalEven:
219 return spv::ExecutionMode::SpacingFractionalEven;
220 }
221 throw InvalidArgument("Tessellation spacing {}", spacing);
222}
223
224void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
225 const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
226 spv::ExecutionModel execution_model{};
227 switch (program.stage) {
228 case Stage::Compute: {
229 const std::array<u32, 3> workgroup_size{program.workgroup_size};
230 execution_model = spv::ExecutionModel::GLCompute;
231 ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
232 workgroup_size[1], workgroup_size[2]);
233 break;
234 }
235 case Stage::VertexB:
236 execution_model = spv::ExecutionModel::Vertex;
237 break;
238 case Stage::TessellationControl:
239 execution_model = spv::ExecutionModel::TessellationControl;
240 ctx.AddCapability(spv::Capability::Tessellation);
241 ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.invocations);
242 break;
243 case Stage::TessellationEval:
244 execution_model = spv::ExecutionModel::TessellationEvaluation;
245 ctx.AddCapability(spv::Capability::Tessellation);
246 ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_primitive));
247 ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_spacing));
248 ctx.AddExecutionMode(main, ctx.runtime_info.tess_clockwise
249 ? spv::ExecutionMode::VertexOrderCw
250 : spv::ExecutionMode::VertexOrderCcw);
251 break;
252 case Stage::Geometry:
253 execution_model = spv::ExecutionModel::Geometry;
254 ctx.AddCapability(spv::Capability::Geometry);
255 ctx.AddCapability(spv::Capability::GeometryStreams);
256 switch (ctx.runtime_info.input_topology) {
257 case InputTopology::Points:
258 ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints);
259 break;
260 case InputTopology::Lines:
261 ctx.AddExecutionMode(main, spv::ExecutionMode::InputLines);
262 break;
263 case InputTopology::LinesAdjacency:
264 ctx.AddExecutionMode(main, spv::ExecutionMode::InputLinesAdjacency);
265 break;
266 case InputTopology::Triangles:
267 ctx.AddExecutionMode(main, spv::ExecutionMode::Triangles);
268 break;
269 case InputTopology::TrianglesAdjacency:
270 ctx.AddExecutionMode(main, spv::ExecutionMode::InputTrianglesAdjacency);
271 break;
272 }
273 switch (program.output_topology) {
274 case OutputTopology::PointList:
275 ctx.AddExecutionMode(main, spv::ExecutionMode::OutputPoints);
276 break;
277 case OutputTopology::LineStrip:
278 ctx.AddExecutionMode(main, spv::ExecutionMode::OutputLineStrip);
279 break;
280 case OutputTopology::TriangleStrip:
281 ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip);
282 break;
283 }
284 if (program.info.stores[IR::Attribute::PointSize]) {
285 ctx.AddCapability(spv::Capability::GeometryPointSize);
286 }
287 ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices);
288 ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations);
289 if (program.is_geometry_passthrough) {
290 if (ctx.profile.support_geometry_shader_passthrough) {
291 ctx.AddExtension("SPV_NV_geometry_shader_passthrough");
292 ctx.AddCapability(spv::Capability::GeometryShaderPassthroughNV);
293 } else {
294 LOG_WARNING(Shader_SPIRV, "Geometry shader passthrough used with no support");
295 }
296 }
297 break;
298 case Stage::Fragment:
299 execution_model = spv::ExecutionModel::Fragment;
300 if (ctx.profile.lower_left_origin_mode) {
301 ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft);
302 } else {
303 ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
304 }
305 if (program.info.stores_frag_depth) {
306 ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
307 }
308 if (ctx.runtime_info.force_early_z) {
309 ctx.AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests);
310 }
311 break;
312 default:
313 throw NotImplementedException("Stage {}", program.stage);
314 }
315 ctx.AddEntryPoint(execution_model, main, "main", interfaces);
316}
317
318void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx,
319 Id main_func) {
320 const Info& info{program.info};
321 if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) {
322 LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader");
323 } else if (info.uses_fp32_denorms_flush) {
324 if (profile.support_fp32_denorm_flush) {
325 ctx.AddCapability(spv::Capability::DenormFlushToZero);
326 ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U);
327 } else {
328 // Drivers will most likely flush denorms by default, no need to warn
329 }
330 } else if (info.uses_fp32_denorms_preserve) {
331 if (profile.support_fp32_denorm_preserve) {
332 ctx.AddCapability(spv::Capability::DenormPreserve);
333 ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U);
334 } else {
335 LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support");
336 }
337 }
338 if (!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls) {
339 // No separate denorm behavior
340 return;
341 }
342 if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) {
343 LOG_DEBUG(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader");
344 } else if (info.uses_fp16_denorms_flush) {
345 if (profile.support_fp16_denorm_flush) {
346 ctx.AddCapability(spv::Capability::DenormFlushToZero);
347 ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 16U);
348 } else {
349 // Same as fp32, no need to warn as most drivers will flush by default
350 }
351 } else if (info.uses_fp16_denorms_preserve) {
352 if (profile.support_fp16_denorm_preserve) {
353 ctx.AddCapability(spv::Capability::DenormPreserve);
354 ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U);
355 } else {
356 LOG_DEBUG(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support");
357 }
358 }
359}
360
361void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program,
362 EmitContext& ctx, Id main_func) {
363 if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) {
364 return;
365 }
366 if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) {
367 ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
368 ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U);
369 }
370 if (profile.support_fp32_signed_zero_nan_preserve) {
371 ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
372 ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U);
373 }
374 if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) {
375 ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
376 ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 64U);
377 }
378}
379
380void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) {
381 if (info.uses_sampled_1d) {
382 ctx.AddCapability(spv::Capability::Sampled1D);
383 }
384 if (info.uses_sparse_residency) {
385 ctx.AddCapability(spv::Capability::SparseResidency);
386 }
387 if (info.uses_demote_to_helper_invocation && profile.support_demote_to_helper_invocation) {
388 ctx.AddExtension("SPV_EXT_demote_to_helper_invocation");
389 ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
390 }
391 if (info.stores[IR::Attribute::ViewportIndex]) {
392 ctx.AddCapability(spv::Capability::MultiViewport);
393 }
394 if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) {
395 ctx.AddExtension("SPV_NV_viewport_array2");
396 ctx.AddCapability(spv::Capability::ShaderViewportMaskNV);
397 }
398 if (info.stores[IR::Attribute::Layer] || info.stores[IR::Attribute::ViewportIndex]) {
399 if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) {
400 ctx.AddExtension("SPV_EXT_shader_viewport_index_layer");
401 ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT);
402 }
403 }
404 if (!profile.support_vertex_instance_id &&
405 (info.loads[IR::Attribute::InstanceId] || info.loads[IR::Attribute::VertexId])) {
406 ctx.AddExtension("SPV_KHR_shader_draw_parameters");
407 ctx.AddCapability(spv::Capability::DrawParameters);
408 }
409 if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id ||
410 info.uses_subgroup_shuffles) &&
411 profile.support_vote) {
412 ctx.AddExtension("SPV_KHR_shader_ballot");
413 ctx.AddCapability(spv::Capability::SubgroupBallotKHR);
414 if (!profile.warp_size_potentially_larger_than_guest) {
415 // vote ops are only used when not taking the long path
416 ctx.AddExtension("SPV_KHR_subgroup_vote");
417 ctx.AddCapability(spv::Capability::SubgroupVoteKHR);
418 }
419 }
420 if (info.uses_int64_bit_atomics && profile.support_int64_atomics) {
421 ctx.AddCapability(spv::Capability::Int64Atomics);
422 }
423 if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) {
424 ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat);
425 }
426 if (info.uses_typeless_image_writes) {
427 ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
428 }
429 if (info.uses_image_buffers) {
430 ctx.AddCapability(spv::Capability::ImageBuffer);
431 }
432 if (info.uses_sample_id) {
433 ctx.AddCapability(spv::Capability::SampleRateShading);
434 }
435 if (!ctx.runtime_info.xfb_varyings.empty()) {
436 ctx.AddCapability(spv::Capability::TransformFeedback);
437 }
438 if (info.uses_derivatives) {
439 ctx.AddCapability(spv::Capability::DerivativeControl);
440 }
441 // TODO: Track this usage
442 ctx.AddCapability(spv::Capability::ImageGatherExtended);
443 ctx.AddCapability(spv::Capability::ImageQuery);
444 ctx.AddCapability(spv::Capability::SampledBuffer);
445}
446
447void PatchPhiNodes(IR::Program& program, EmitContext& ctx) {
448 auto inst{program.blocks.front()->begin()};
449 size_t block_index{0};
450 ctx.PatchDeferredPhi([&](size_t phi_arg) {
451 if (phi_arg == 0) {
452 ++inst;
453 if (inst == program.blocks[block_index]->end() ||
454 inst->GetOpcode() != IR::Opcode::Phi) {
455 do {
456 ++block_index;
457 inst = program.blocks[block_index]->begin();
458 } while (inst->GetOpcode() != IR::Opcode::Phi);
459 }
460 }
461 return ctx.Def(inst->Arg(phi_arg));
462 });
463}
464} // Anonymous namespace
465
466std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
467 IR::Program& program, Bindings& bindings) {
468 EmitContext ctx{profile, runtime_info, program, bindings};
469 const Id main{DefineMain(ctx, program)};
470 DefineEntryPoint(program, ctx, main);
471 if (profile.support_float_controls) {
472 ctx.AddExtension("SPV_KHR_float_controls");
473 SetupDenormControl(profile, program, ctx, main);
474 SetupSignedNanCapabilities(profile, program, ctx, main);
475 }
476 SetupCapabilities(profile, program.info, ctx);
477 PatchPhiNodes(program, ctx);
478 return ctx.Assemble();
479}
480
481Id EmitPhi(EmitContext& ctx, IR::Inst* inst) {
482 const size_t num_args{inst->NumArgs()};
483 boost::container::small_vector<Id, 32> blocks;
484 blocks.reserve(num_args);
485 for (size_t index = 0; index < num_args; ++index) {
486 blocks.push_back(inst->PhiBlock(index)->Definition<Id>());
487 }
488 // The type of a phi instruction is stored in its flags
489 const Id result_type{TypeId(ctx, inst->Flags<IR::Type>())};
490 return ctx.DeferredOpPhi(result_type, std::span(blocks.data(), blocks.size()));
491}
492
493void EmitVoid(EmitContext&) {}
494
495Id EmitIdentity(EmitContext& ctx, const IR::Value& value) {
496 const Id id{ctx.Def(value)};
497 if (!Sirit::ValidId(id)) {
498 throw NotImplementedException("Forward identity declaration");
499 }
500 return id;
501}
502
503Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) {
504 const Id id{ctx.Def(value)};
505 if (!Sirit::ValidId(id)) {
506 throw NotImplementedException("Forward identity declaration");
507 }
508 return id;
509}
510
511void EmitReference(EmitContext&) {}
512
513void EmitPhiMove(EmitContext&) {
514 throw LogicError("Unreachable instruction");
515}
516
517void EmitGetZeroFromOp(EmitContext&) {
518 throw LogicError("Unreachable instruction");
519}
520
521void EmitGetSignFromOp(EmitContext&) {
522 throw LogicError("Unreachable instruction");
523}
524
525void EmitGetCarryFromOp(EmitContext&) {
526 throw LogicError("Unreachable instruction");
527}
528
529void EmitGetOverflowFromOp(EmitContext&) {
530 throw LogicError("Unreachable instruction");
531}
532
533void EmitGetSparseFromOp(EmitContext&) {
534 throw LogicError("Unreachable instruction");
535}
536
537void EmitGetInBoundsFromOp(EmitContext&) {
538 throw LogicError("Unreachable instruction");
539}
540
541} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
new file mode 100644
index 000000000..db0c935fe
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -0,0 +1,27 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8
9#include <sirit/sirit.h>
10
11#include "common/common_types.h"
12#include "shader_recompiler/backend/bindings.h"
13#include "shader_recompiler/backend/spirv/emit_context.h"
14#include "shader_recompiler/frontend/ir/program.h"
15#include "shader_recompiler/profile.h"
16
17namespace Shader::Backend::SPIRV {
18
19[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
20 IR::Program& program, Bindings& bindings);
21
22[[nodiscard]] inline std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program) {
23 Bindings binding;
24 return EmitSPIRV(profile, {}, program, binding);
25}
26
27} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
new file mode 100644
index 000000000..9af8bb9e1
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@@ -0,0 +1,448 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9namespace {
10Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) {
11 const Id shift_id{ctx.Const(2U)};
12 Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
13 if (index_offset > 0) {
14 index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset));
15 }
16 return ctx.profile.support_explicit_workgroup_layout
17 ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)
18 : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
19}
20
21Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) {
22 if (offset.IsImmediate()) {
23 const u32 imm_offset{static_cast<u32>(offset.U32() / element_size)};
24 return ctx.Const(imm_offset);
25 }
26 const u32 shift{static_cast<u32>(std::countr_zero(element_size))};
27 const Id index{ctx.Def(offset)};
28 if (shift == 0) {
29 return index;
30 }
31 const Id shift_id{ctx.Const(shift)};
32 return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id);
33}
34
35Id StoragePointer(EmitContext& ctx, const StorageTypeDefinition& type_def,
36 Id StorageDefinitions::*member_ptr, const IR::Value& binding,
37 const IR::Value& offset, size_t element_size) {
38 if (!binding.IsImmediate()) {
39 throw NotImplementedException("Dynamic storage buffer indexing");
40 }
41 const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr};
42 const Id index{StorageIndex(ctx, offset, element_size)};
43 return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index);
44}
45
46std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
47 const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))};
48 const Id semantics{ctx.u32_zero_value};
49 return {scope, semantics};
50}
51
52Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
53 Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
54 const Id pointer{SharedPointer(ctx, offset)};
55 const auto [scope, semantics]{AtomicArgs(ctx)};
56 return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
57}
58
59Id StorageAtomicU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
60 Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
61 const Id pointer{StoragePointer(ctx, ctx.storage_types.U32, &StorageDefinitions::U32, binding,
62 offset, sizeof(u32))};
63 const auto [scope, semantics]{AtomicArgs(ctx)};
64 return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
65}
66
67Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
68 Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id),
69 Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) {
70 if (ctx.profile.support_int64_atomics) {
71 const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64,
72 binding, offset, sizeof(u64))};
73 const auto [scope, semantics]{AtomicArgs(ctx)};
74 return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value);
75 }
76 LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
77 const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
78 binding, offset, sizeof(u32[2]))};
79 const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))};
80 const Id result{(ctx.*non_atomic_func)(ctx.U64, value, original_value)};
81 ctx.OpStore(pointer, ctx.OpBitcast(ctx.U32[2], result));
82 return original_value;
83}
84} // Anonymous namespace
85
86Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) {
87 return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd);
88}
89
90Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) {
91 return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin);
92}
93
94Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) {
95 return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin);
96}
97
98Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) {
99 return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax);
100}
101
102Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) {
103 return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
104}
105
106Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset, Id value) {
107 const Id shift_id{ctx.Const(2U)};
108 const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
109 return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value);
110}
111
112Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset, Id value) {
113 const Id shift_id{ctx.Const(2U)};
114 const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
115 return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value);
116}
117
118Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) {
119 return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd);
120}
121
122Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) {
123 return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr);
124}
125
126Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) {
127 return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor);
128}
129
130Id EmitSharedAtomicExchange32(EmitContext& ctx, Id offset, Id value) {
131 return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicExchange);
132}
133
134Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) {
135 if (ctx.profile.support_int64_atomics && ctx.profile.support_explicit_workgroup_layout) {
136 const Id shift_id{ctx.Const(3U)};
137 const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
138 const Id pointer{
139 ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
140 const auto [scope, semantics]{AtomicArgs(ctx)};
141 return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value);
142 }
143 LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
144 const Id pointer_1{SharedPointer(ctx, offset, 0)};
145 const Id pointer_2{SharedPointer(ctx, offset, 1)};
146 const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)};
147 const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)};
148 const Id new_vector{ctx.OpBitcast(ctx.U32[2], value)};
149 ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 0U));
150 ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 1U));
151 return ctx.OpBitcast(ctx.U64, ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2));
152}
153
154Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
155 Id value) {
156 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd);
157}
158
159Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
160 Id value) {
161 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin);
162}
163
164Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
165 Id value) {
166 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin);
167}
168
169Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
170 Id value) {
171 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax);
172}
173
174Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
175 Id value) {
176 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax);
177}
178
179Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
180 Id value) {
181 const Id ssbo{ctx.ssbos[binding.U32()].U32};
182 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
183 return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo);
184}
185
186Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
187 Id value) {
188 const Id ssbo{ctx.ssbos[binding.U32()].U32};
189 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
190 return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo);
191}
192
193Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
194 Id value) {
195 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd);
196}
197
198Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
199 Id value) {
200 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr);
201}
202
203Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
204 Id value) {
205 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor);
206}
207
208Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
209 Id value) {
210 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicExchange);
211}
212
213Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
214 Id value) {
215 return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd,
216 &Sirit::Module::OpIAdd);
217}
218
219Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
220 Id value) {
221 return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin,
222 &Sirit::Module::OpSMin);
223}
224
225Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
226 Id value) {
227 return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin,
228 &Sirit::Module::OpUMin);
229}
230
231Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
232 Id value) {
233 return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax,
234 &Sirit::Module::OpSMax);
235}
236
237Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
238 Id value) {
239 return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax,
240 &Sirit::Module::OpUMax);
241}
242
243Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
244 Id value) {
245 return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd,
246 &Sirit::Module::OpBitwiseAnd);
247}
248
249Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
250 Id value) {
251 return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr,
252 &Sirit::Module::OpBitwiseOr);
253}
254
255Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
256 Id value) {
257 return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor,
258 &Sirit::Module::OpBitwiseXor);
259}
260
261Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
262 Id value) {
263 if (ctx.profile.support_int64_atomics) {
264 const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64,
265 binding, offset, sizeof(u64))};
266 const auto [scope, semantics]{AtomicArgs(ctx)};
267 return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value);
268 }
269 LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
270 const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
271 binding, offset, sizeof(u32[2]))};
272 const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))};
273 ctx.OpStore(pointer, value);
274 return original;
275}
276
277Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
278 Id value) {
279 const Id ssbo{ctx.ssbos[binding.U32()].U32};
280 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
281 return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo);
282}
283
284Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
285 Id value) {
286 const Id ssbo{ctx.ssbos[binding.U32()].U32};
287 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
288 const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)};
289 return ctx.OpBitcast(ctx.U32[1], result);
290}
291
292Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
293 Id value) {
294 const Id ssbo{ctx.ssbos[binding.U32()].U32};
295 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
296 const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)};
297 return ctx.OpPackHalf2x16(ctx.U32[1], result);
298}
299
300Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
301 Id value) {
302 const Id ssbo{ctx.ssbos[binding.U32()].U32};
303 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
304 const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)};
305 return ctx.OpBitcast(ctx.U32[1], result);
306}
307
308Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
309 Id value) {
310 const Id ssbo{ctx.ssbos[binding.U32()].U32};
311 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
312 const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)};
313 return ctx.OpPackHalf2x16(ctx.U32[1], result);
314}
315
316Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
317 Id value) {
318 const Id ssbo{ctx.ssbos[binding.U32()].U32};
319 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
320 const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)};
321 return ctx.OpBitcast(ctx.U32[1], result);
322}
323
324Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
325 Id value) {
326 const Id ssbo{ctx.ssbos[binding.U32()].U32};
327 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
328 const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)};
329 return ctx.OpPackHalf2x16(ctx.U32[1], result);
330}
331
332Id EmitGlobalAtomicIAdd32(EmitContext&) {
333 throw NotImplementedException("SPIR-V Instruction");
334}
335
336Id EmitGlobalAtomicSMin32(EmitContext&) {
337 throw NotImplementedException("SPIR-V Instruction");
338}
339
340Id EmitGlobalAtomicUMin32(EmitContext&) {
341 throw NotImplementedException("SPIR-V Instruction");
342}
343
344Id EmitGlobalAtomicSMax32(EmitContext&) {
345 throw NotImplementedException("SPIR-V Instruction");
346}
347
348Id EmitGlobalAtomicUMax32(EmitContext&) {
349 throw NotImplementedException("SPIR-V Instruction");
350}
351
352Id EmitGlobalAtomicInc32(EmitContext&) {
353 throw NotImplementedException("SPIR-V Instruction");
354}
355
356Id EmitGlobalAtomicDec32(EmitContext&) {
357 throw NotImplementedException("SPIR-V Instruction");
358}
359
360Id EmitGlobalAtomicAnd32(EmitContext&) {
361 throw NotImplementedException("SPIR-V Instruction");
362}
363
364Id EmitGlobalAtomicOr32(EmitContext&) {
365 throw NotImplementedException("SPIR-V Instruction");
366}
367
368Id EmitGlobalAtomicXor32(EmitContext&) {
369 throw NotImplementedException("SPIR-V Instruction");
370}
371
372Id EmitGlobalAtomicExchange32(EmitContext&) {
373 throw NotImplementedException("SPIR-V Instruction");
374}
375
376Id EmitGlobalAtomicIAdd64(EmitContext&) {
377 throw NotImplementedException("SPIR-V Instruction");
378}
379
380Id EmitGlobalAtomicSMin64(EmitContext&) {
381 throw NotImplementedException("SPIR-V Instruction");
382}
383
384Id EmitGlobalAtomicUMin64(EmitContext&) {
385 throw NotImplementedException("SPIR-V Instruction");
386}
387
388Id EmitGlobalAtomicSMax64(EmitContext&) {
389 throw NotImplementedException("SPIR-V Instruction");
390}
391
392Id EmitGlobalAtomicUMax64(EmitContext&) {
393 throw NotImplementedException("SPIR-V Instruction");
394}
395
396Id EmitGlobalAtomicInc64(EmitContext&) {
397 throw NotImplementedException("SPIR-V Instruction");
398}
399
400Id EmitGlobalAtomicDec64(EmitContext&) {
401 throw NotImplementedException("SPIR-V Instruction");
402}
403
404Id EmitGlobalAtomicAnd64(EmitContext&) {
405 throw NotImplementedException("SPIR-V Instruction");
406}
407
408Id EmitGlobalAtomicOr64(EmitContext&) {
409 throw NotImplementedException("SPIR-V Instruction");
410}
411
412Id EmitGlobalAtomicXor64(EmitContext&) {
413 throw NotImplementedException("SPIR-V Instruction");
414}
415
416Id EmitGlobalAtomicExchange64(EmitContext&) {
417 throw NotImplementedException("SPIR-V Instruction");
418}
419
420Id EmitGlobalAtomicAddF32(EmitContext&) {
421 throw NotImplementedException("SPIR-V Instruction");
422}
423
424Id EmitGlobalAtomicAddF16x2(EmitContext&) {
425 throw NotImplementedException("SPIR-V Instruction");
426}
427
428Id EmitGlobalAtomicAddF32x2(EmitContext&) {
429 throw NotImplementedException("SPIR-V Instruction");
430}
431
432Id EmitGlobalAtomicMinF16x2(EmitContext&) {
433 throw NotImplementedException("SPIR-V Instruction");
434}
435
436Id EmitGlobalAtomicMinF32x2(EmitContext&) {
437 throw NotImplementedException("SPIR-V Instruction");
438}
439
440Id EmitGlobalAtomicMaxF16x2(EmitContext&) {
441 throw NotImplementedException("SPIR-V Instruction");
442}
443
444Id EmitGlobalAtomicMaxF32x2(EmitContext&) {
445 throw NotImplementedException("SPIR-V Instruction");
446}
447
448} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
new file mode 100644
index 000000000..e0b52a001
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
@@ -0,0 +1,38 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7#include "shader_recompiler/frontend/ir/modifiers.h"
8
9namespace Shader::Backend::SPIRV {
10namespace {
11void MemoryBarrier(EmitContext& ctx, spv::Scope scope) {
12 const auto semantics{
13 spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
14 spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory |
15 spv::MemorySemanticsMask::ImageMemory};
16 ctx.OpMemoryBarrier(ctx.Const(static_cast<u32>(scope)), ctx.Const(static_cast<u32>(semantics)));
17}
18} // Anonymous namespace
19
20void EmitBarrier(EmitContext& ctx) {
21 const auto execution{spv::Scope::Workgroup};
22 const auto memory{spv::Scope::Workgroup};
23 const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease |
24 spv::MemorySemanticsMask::WorkgroupMemory};
25 ctx.OpControlBarrier(ctx.Const(static_cast<u32>(execution)),
26 ctx.Const(static_cast<u32>(memory)),
27 ctx.Const(static_cast<u32>(memory_semantics)));
28}
29
30void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
31 MemoryBarrier(ctx, spv::Scope::Workgroup);
32}
33
34void EmitDeviceMemoryBarrier(EmitContext& ctx) {
35 MemoryBarrier(ctx, spv::Scope::Device);
36}
37
38} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
new file mode 100644
index 000000000..bb11f4f4e
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9
10void EmitBitCastU16F16(EmitContext&) {
11 throw NotImplementedException("SPIR-V Instruction");
12}
13
14Id EmitBitCastU32F32(EmitContext& ctx, Id value) {
15 return ctx.OpBitcast(ctx.U32[1], value);
16}
17
18void EmitBitCastU64F64(EmitContext&) {
19 throw NotImplementedException("SPIR-V Instruction");
20}
21
22void EmitBitCastF16U16(EmitContext&) {
23 throw NotImplementedException("SPIR-V Instruction");
24}
25
26Id EmitBitCastF32U32(EmitContext& ctx, Id value) {
27 return ctx.OpBitcast(ctx.F32[1], value);
28}
29
30void EmitBitCastF64U64(EmitContext&) {
31 throw NotImplementedException("SPIR-V Instruction");
32}
33
34Id EmitPackUint2x32(EmitContext& ctx, Id value) {
35 return ctx.OpBitcast(ctx.U64, value);
36}
37
38Id EmitUnpackUint2x32(EmitContext& ctx, Id value) {
39 return ctx.OpBitcast(ctx.U32[2], value);
40}
41
42Id EmitPackFloat2x16(EmitContext& ctx, Id value) {
43 return ctx.OpBitcast(ctx.U32[1], value);
44}
45
46Id EmitUnpackFloat2x16(EmitContext& ctx, Id value) {
47 return ctx.OpBitcast(ctx.F16[2], value);
48}
49
50Id EmitPackHalf2x16(EmitContext& ctx, Id value) {
51 return ctx.OpPackHalf2x16(ctx.U32[1], value);
52}
53
54Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) {
55 return ctx.OpUnpackHalf2x16(ctx.F32[2], value);
56}
57
58Id EmitPackDouble2x32(EmitContext& ctx, Id value) {
59 return ctx.OpBitcast(ctx.F64[1], value);
60}
61
62Id EmitUnpackDouble2x32(EmitContext& ctx, Id value) {
63 return ctx.OpBitcast(ctx.U32[2], value);
64}
65
66} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
new file mode 100644
index 000000000..10ff4ecab
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
@@ -0,0 +1,155 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7#include "shader_recompiler/frontend/ir/modifiers.h"
8
9namespace Shader::Backend::SPIRV {
10
11Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) {
12 return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2);
13}
14
15Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
16 return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3);
17}
18
19Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
20 return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4);
21}
22
23Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) {
24 return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
25}
26
27Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index) {
28 return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
29}
30
31Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) {
32 return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
33}
34
35Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index) {
36 return ctx.OpCompositeInsert(ctx.U32[2], object, composite, index);
37}
38
39Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index) {
40 return ctx.OpCompositeInsert(ctx.U32[3], object, composite, index);
41}
42
43Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index) {
44 return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index);
45}
46
47Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) {
48 return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2);
49}
50
51Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
52 return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3);
53}
54
55Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
56 return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4);
57}
58
59Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) {
60 return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
61}
62
63Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index) {
64 return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
65}
66
67Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) {
68 return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
69}
70
71Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) {
72 return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index);
73}
74
75Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) {
76 return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index);
77}
78
79Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) {
80 return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index);
81}
82
83Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) {
84 return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2);
85}
86
87Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
88 return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3);
89}
90
91Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
92 return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4);
93}
94
95Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) {
96 return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
97}
98
99Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index) {
100 return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
101}
102
103Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) {
104 return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
105}
106
107Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) {
108 return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index);
109}
110
111Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) {
112 return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index);
113}
114
115Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) {
116 return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index);
117}
118
119void EmitCompositeConstructF64x2(EmitContext&) {
120 throw NotImplementedException("SPIR-V Instruction");
121}
122
123void EmitCompositeConstructF64x3(EmitContext&) {
124 throw NotImplementedException("SPIR-V Instruction");
125}
126
127void EmitCompositeConstructF64x4(EmitContext&) {
128 throw NotImplementedException("SPIR-V Instruction");
129}
130
131void EmitCompositeExtractF64x2(EmitContext&) {
132 throw NotImplementedException("SPIR-V Instruction");
133}
134
135void EmitCompositeExtractF64x3(EmitContext&) {
136 throw NotImplementedException("SPIR-V Instruction");
137}
138
139void EmitCompositeExtractF64x4(EmitContext&) {
140 throw NotImplementedException("SPIR-V Instruction");
141}
142
143Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) {
144 return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index);
145}
146
147Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) {
148 return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index);
149}
150
151Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) {
152 return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index);
153}
154
155} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
new file mode 100644
index 000000000..fb8c02a77
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -0,0 +1,505 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <tuple>
6#include <utility>
7
8#include "shader_recompiler/backend/spirv/emit_spirv.h"
9#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
10
11namespace Shader::Backend::SPIRV {
12namespace {
13struct AttrInfo {
14 Id pointer;
15 Id id;
16 bool needs_cast;
17};
18
19std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) {
20 const AttributeType type{ctx.runtime_info.generic_input_types.at(index)};
21 switch (type) {
22 case AttributeType::Float:
23 return AttrInfo{ctx.input_f32, ctx.F32[1], false};
24 case AttributeType::UnsignedInt:
25 return AttrInfo{ctx.input_u32, ctx.U32[1], true};
26 case AttributeType::SignedInt:
27 return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true};
28 case AttributeType::Disabled:
29 return std::nullopt;
30 }
31 throw InvalidArgument("Invalid attribute type {}", type);
32}
33
34template <typename... Args>
35Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) {
36 switch (ctx.stage) {
37 case Stage::TessellationControl:
38 case Stage::TessellationEval:
39 case Stage::Geometry:
40 return ctx.OpAccessChain(pointer_type, base, vertex, std::forward<Args>(args)...);
41 default:
42 return ctx.OpAccessChain(pointer_type, base, std::forward<Args>(args)...);
43 }
44}
45
46template <typename... Args>
47Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) {
48 if (ctx.stage == Stage::TessellationControl) {
49 const Id invocation_id{ctx.OpLoad(ctx.U32[1], ctx.invocation_id)};
50 return ctx.OpAccessChain(result_type, base, invocation_id, std::forward<Args>(args)...);
51 } else {
52 return ctx.OpAccessChain(result_type, base, std::forward<Args>(args)...);
53 }
54}
55
56struct OutAttr {
57 OutAttr(Id pointer_) : pointer{pointer_} {}
58 OutAttr(Id pointer_, Id type_) : pointer{pointer_}, type{type_} {}
59
60 Id pointer{};
61 Id type{};
62};
63
64std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
65 if (IR::IsGeneric(attr)) {
66 const u32 index{IR::GenericAttributeIndex(attr)};
67 const u32 element{IR::GenericAttributeElement(attr)};
68 const GenericElementInfo& info{ctx.output_generics.at(index).at(element)};
69 if (info.num_components == 1) {
70 return info.id;
71 } else {
72 const u32 index_element{element - info.first_element};
73 const Id index_id{ctx.Const(index_element)};
74 return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id);
75 }
76 }
77 switch (attr) {
78 case IR::Attribute::PointSize:
79 return ctx.output_point_size;
80 case IR::Attribute::PositionX:
81 case IR::Attribute::PositionY:
82 case IR::Attribute::PositionZ:
83 case IR::Attribute::PositionW: {
84 const u32 element{static_cast<u32>(attr) % 4};
85 const Id element_id{ctx.Const(element)};
86 return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id);
87 }
88 case IR::Attribute::ClipDistance0:
89 case IR::Attribute::ClipDistance1:
90 case IR::Attribute::ClipDistance2:
91 case IR::Attribute::ClipDistance3:
92 case IR::Attribute::ClipDistance4:
93 case IR::Attribute::ClipDistance5:
94 case IR::Attribute::ClipDistance6:
95 case IR::Attribute::ClipDistance7: {
96 const u32 base{static_cast<u32>(IR::Attribute::ClipDistance0)};
97 const u32 index{static_cast<u32>(attr) - base};
98 const Id clip_num{ctx.Const(index)};
99 return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num);
100 }
101 case IR::Attribute::Layer:
102 if (ctx.profile.support_viewport_index_layer_non_geometry ||
103 ctx.stage == Shader::Stage::Geometry) {
104 return OutAttr{ctx.layer, ctx.U32[1]};
105 }
106 return std::nullopt;
107 case IR::Attribute::ViewportIndex:
108 if (ctx.profile.support_viewport_index_layer_non_geometry ||
109 ctx.stage == Shader::Stage::Geometry) {
110 return OutAttr{ctx.viewport_index, ctx.U32[1]};
111 }
112 return std::nullopt;
113 case IR::Attribute::ViewportMask:
114 if (!ctx.profile.support_viewport_mask) {
115 return std::nullopt;
116 }
117 return OutAttr{ctx.OpAccessChain(ctx.output_u32, ctx.viewport_mask, ctx.u32_zero_value),
118 ctx.U32[1]};
119 default:
120 throw NotImplementedException("Read attribute {}", attr);
121 }
122}
123
124Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size,
125 const IR::Value& binding, const IR::Value& offset) {
126 if (!binding.IsImmediate()) {
127 throw NotImplementedException("Constant buffer indexing");
128 }
129 const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr};
130 const Id uniform_type{ctx.uniform_types.*member_ptr};
131 if (!offset.IsImmediate()) {
132 Id index{ctx.Def(offset)};
133 if (element_size > 1) {
134 const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))};
135 const Id shift{ctx.Const(log2_element_size)};
136 index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift);
137 }
138 const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)};
139 return ctx.OpLoad(result_type, access_chain);
140 }
141 // Hardware been proved to read the aligned offset (e.g. LDC.U32 at 6 will read offset 4)
142 const Id imm_offset{ctx.Const(offset.U32() / element_size)};
143 const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)};
144 return ctx.OpLoad(result_type, access_chain);
145}
146
147Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
148 return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset);
149}
150
151Id GetCbufU32x4(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
152 return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset);
153}
154
155Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 index_offset) {
156 if (offset.IsImmediate()) {
157 const u32 element{(offset.U32() / 4) % 4 + index_offset};
158 return ctx.OpCompositeExtract(ctx.U32[1], vector, element);
159 }
160 const Id shift{ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))};
161 Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))};
162 if (index_offset > 0) {
163 element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset));
164 }
165 return ctx.OpVectorExtractDynamic(ctx.U32[1], vector, element);
166}
167} // Anonymous namespace
168
169void EmitGetRegister(EmitContext&) {
170 throw LogicError("Unreachable instruction");
171}
172
173void EmitSetRegister(EmitContext&) {
174 throw LogicError("Unreachable instruction");
175}
176
177void EmitGetPred(EmitContext&) {
178 throw LogicError("Unreachable instruction");
179}
180
181void EmitSetPred(EmitContext&) {
182 throw LogicError("Unreachable instruction");
183}
184
185void EmitSetGotoVariable(EmitContext&) {
186 throw LogicError("Unreachable instruction");
187}
188
189void EmitGetGotoVariable(EmitContext&) {
190 throw LogicError("Unreachable instruction");
191}
192
193void EmitSetIndirectBranchVariable(EmitContext&) {
194 throw LogicError("Unreachable instruction");
195}
196
197void EmitGetIndirectBranchVariable(EmitContext&) {
198 throw LogicError("Unreachable instruction");
199}
200
201Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
202 if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) {
203 const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)};
204 return ctx.OpUConvert(ctx.U32[1], load);
205 }
206 Id element{};
207 if (ctx.profile.support_descriptor_aliasing) {
208 element = GetCbufU32(ctx, binding, offset);
209 } else {
210 const Id vector{GetCbufU32x4(ctx, binding, offset)};
211 element = GetCbufElement(ctx, vector, offset, 0u);
212 }
213 const Id bit_offset{ctx.BitOffset8(offset)};
214 return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u));
215}
216
217Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
218 if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) {
219 const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)};
220 return ctx.OpSConvert(ctx.U32[1], load);
221 }
222 Id element{};
223 if (ctx.profile.support_descriptor_aliasing) {
224 element = GetCbufU32(ctx, binding, offset);
225 } else {
226 const Id vector{GetCbufU32x4(ctx, binding, offset)};
227 element = GetCbufElement(ctx, vector, offset, 0u);
228 }
229 const Id bit_offset{ctx.BitOffset8(offset)};
230 return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u));
231}
232
233Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
234 if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) {
235 const Id load{
236 GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)};
237 return ctx.OpUConvert(ctx.U32[1], load);
238 }
239 Id element{};
240 if (ctx.profile.support_descriptor_aliasing) {
241 element = GetCbufU32(ctx, binding, offset);
242 } else {
243 const Id vector{GetCbufU32x4(ctx, binding, offset)};
244 element = GetCbufElement(ctx, vector, offset, 0u);
245 }
246 const Id bit_offset{ctx.BitOffset16(offset)};
247 return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u));
248}
249
250Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
251 if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) {
252 const Id load{
253 GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)};
254 return ctx.OpSConvert(ctx.U32[1], load);
255 }
256 Id element{};
257 if (ctx.profile.support_descriptor_aliasing) {
258 element = GetCbufU32(ctx, binding, offset);
259 } else {
260 const Id vector{GetCbufU32x4(ctx, binding, offset)};
261 element = GetCbufElement(ctx, vector, offset, 0u);
262 }
263 const Id bit_offset{ctx.BitOffset16(offset)};
264 return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u));
265}
266
267Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
268 if (ctx.profile.support_descriptor_aliasing) {
269 return GetCbufU32(ctx, binding, offset);
270 } else {
271 const Id vector{GetCbufU32x4(ctx, binding, offset)};
272 return GetCbufElement(ctx, vector, offset, 0u);
273 }
274}
275
276Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
277 if (ctx.profile.support_descriptor_aliasing) {
278 return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset);
279 } else {
280 const Id vector{GetCbufU32x4(ctx, binding, offset)};
281 return ctx.OpBitcast(ctx.F32[1], GetCbufElement(ctx, vector, offset, 0u));
282 }
283}
284
285Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
286 if (ctx.profile.support_descriptor_aliasing) {
287 return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding,
288 offset);
289 } else {
290 const Id vector{GetCbufU32x4(ctx, binding, offset)};
291 return ctx.OpCompositeConstruct(ctx.U32[2], GetCbufElement(ctx, vector, offset, 0u),
292 GetCbufElement(ctx, vector, offset, 1u));
293 }
294}
295
296Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
297 const u32 element{static_cast<u32>(attr) % 4};
298 if (IR::IsGeneric(attr)) {
299 const u32 index{IR::GenericAttributeIndex(attr)};
300 const std::optional<AttrInfo> type{AttrTypes(ctx, index)};
301 if (!type) {
302 // Attribute is disabled
303 return ctx.Const(element == 3 ? 1.0f : 0.0f);
304 }
305 if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) {
306 // Varying component is not written
307 return ctx.Const(type && element == 3 ? 1.0f : 0.0f);
308 }
309 const Id generic_id{ctx.input_generics.at(index)};
310 const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))};
311 const Id value{ctx.OpLoad(type->id, pointer)};
312 return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;
313 }
314 switch (attr) {
315 case IR::Attribute::PrimitiveId:
316 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id));
317 case IR::Attribute::PositionX:
318 case IR::Attribute::PositionY:
319 case IR::Attribute::PositionZ:
320 case IR::Attribute::PositionW:
321 return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
322 ctx.Const(element)));
323 case IR::Attribute::InstanceId:
324 if (ctx.profile.support_vertex_instance_id) {
325 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
326 } else {
327 const Id index{ctx.OpLoad(ctx.U32[1], ctx.instance_index)};
328 const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_instance)};
329 return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
330 }
331 case IR::Attribute::VertexId:
332 if (ctx.profile.support_vertex_instance_id) {
333 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_id));
334 } else {
335 const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)};
336 const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
337 return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
338 }
339 case IR::Attribute::FrontFace:
340 return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
341 ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value);
342 case IR::Attribute::PointSpriteS:
343 return ctx.OpLoad(ctx.F32[1],
344 ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value));
345 case IR::Attribute::PointSpriteT:
346 return ctx.OpLoad(ctx.F32[1],
347 ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.Const(1U)));
348 case IR::Attribute::TessellationEvaluationPointU:
349 return ctx.OpLoad(ctx.F32[1],
350 ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
351 case IR::Attribute::TessellationEvaluationPointV:
352 return ctx.OpLoad(ctx.F32[1],
353 ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.Const(1U)));
354
355 default:
356 throw NotImplementedException("Read attribute {}", attr);
357 }
358}
359
360void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) {
361 const std::optional<OutAttr> output{OutputAttrPointer(ctx, attr)};
362 if (!output) {
363 return;
364 }
365 if (Sirit::ValidId(output->type)) {
366 value = ctx.OpBitcast(output->type, value);
367 }
368 ctx.OpStore(output->pointer, value);
369}
370
371Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) {
372 switch (ctx.stage) {
373 case Stage::TessellationControl:
374 case Stage::TessellationEval:
375 case Stage::Geometry:
376 return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset, vertex);
377 default:
378 return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset);
379 }
380}
381
382void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, [[maybe_unused]] Id vertex) {
383 ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value);
384}
385
386Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) {
387 if (!IR::IsGeneric(patch)) {
388 throw NotImplementedException("Non-generic patch load");
389 }
390 const u32 index{IR::GenericPatchIndex(patch)};
391 const Id element{ctx.Const(IR::GenericPatchElement(patch))};
392 const Id type{ctx.stage == Stage::TessellationControl ? ctx.output_f32 : ctx.input_f32};
393 const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)};
394 return ctx.OpLoad(ctx.F32[1], pointer);
395}
396
397void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
398 const Id pointer{[&] {
399 if (IR::IsGeneric(patch)) {
400 const u32 index{IR::GenericPatchIndex(patch)};
401 const Id element{ctx.Const(IR::GenericPatchElement(patch))};
402 return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element);
403 }
404 switch (patch) {
405 case IR::Patch::TessellationLodLeft:
406 case IR::Patch::TessellationLodRight:
407 case IR::Patch::TessellationLodTop:
408 case IR::Patch::TessellationLodBottom: {
409 const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
410 const Id index_id{ctx.Const(index)};
411 return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id);
412 }
413 case IR::Patch::TessellationLodInteriorU:
414 return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner,
415 ctx.u32_zero_value);
416 case IR::Patch::TessellationLodInteriorV:
417 return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.Const(1u));
418 default:
419 throw NotImplementedException("Patch {}", patch);
420 }
421 }()};
422 ctx.OpStore(pointer, value);
423}
424
425void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
426 const Id component_id{ctx.Const(component)};
427 const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)};
428 ctx.OpStore(pointer, value);
429}
430
431void EmitSetSampleMask(EmitContext& ctx, Id value) {
432 ctx.OpStore(ctx.sample_mask, value);
433}
434
435void EmitSetFragDepth(EmitContext& ctx, Id value) {
436 ctx.OpStore(ctx.frag_depth, value);
437}
438
439void EmitGetZFlag(EmitContext&) {
440 throw NotImplementedException("SPIR-V Instruction");
441}
442
443void EmitGetSFlag(EmitContext&) {
444 throw NotImplementedException("SPIR-V Instruction");
445}
446
447void EmitGetCFlag(EmitContext&) {
448 throw NotImplementedException("SPIR-V Instruction");
449}
450
451void EmitGetOFlag(EmitContext&) {
452 throw NotImplementedException("SPIR-V Instruction");
453}
454
455void EmitSetZFlag(EmitContext&) {
456 throw NotImplementedException("SPIR-V Instruction");
457}
458
459void EmitSetSFlag(EmitContext&) {
460 throw NotImplementedException("SPIR-V Instruction");
461}
462
463void EmitSetCFlag(EmitContext&) {
464 throw NotImplementedException("SPIR-V Instruction");
465}
466
467void EmitSetOFlag(EmitContext&) {
468 throw NotImplementedException("SPIR-V Instruction");
469}
470
471Id EmitWorkgroupId(EmitContext& ctx) {
472 return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id);
473}
474
475Id EmitLocalInvocationId(EmitContext& ctx) {
476 return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id);
477}
478
479Id EmitInvocationId(EmitContext& ctx) {
480 return ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
481}
482
483Id EmitSampleId(EmitContext& ctx) {
484 return ctx.OpLoad(ctx.U32[1], ctx.sample_id);
485}
486
487Id EmitIsHelperInvocation(EmitContext& ctx) {
488 return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation);
489}
490
491Id EmitYDirection(EmitContext& ctx) {
492 return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f);
493}
494
495Id EmitLoadLocal(EmitContext& ctx, Id word_offset) {
496 const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
497 return ctx.OpLoad(ctx.U32[1], pointer);
498}
499
500void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value) {
501 const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
502 ctx.OpStore(pointer, value);
503}
504
505} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
new file mode 100644
index 000000000..d33486f28
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9
10void EmitJoin(EmitContext&) {
11 throw NotImplementedException("Join shouldn't be emitted");
12}
13
14void EmitDemoteToHelperInvocation(EmitContext& ctx) {
15 if (ctx.profile.support_demote_to_helper_invocation) {
16 ctx.OpDemoteToHelperInvocationEXT();
17 } else {
18 const Id kill_label{ctx.OpLabel()};
19 const Id impossible_label{ctx.OpLabel()};
20 ctx.OpSelectionMerge(impossible_label, spv::SelectionControlMask::MaskNone);
21 ctx.OpBranchConditional(ctx.true_value, kill_label, impossible_label);
22 ctx.AddLabel(kill_label);
23 ctx.OpKill();
24 ctx.AddLabel(impossible_label);
25 }
26}
27
28} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
new file mode 100644
index 000000000..fd42b7a16
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
@@ -0,0 +1,269 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9namespace {
10Id ExtractU16(EmitContext& ctx, Id value) {
11 if (ctx.profile.support_int16) {
12 return ctx.OpUConvert(ctx.U16, value);
13 } else {
14 return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u));
15 }
16}
17
18Id ExtractS16(EmitContext& ctx, Id value) {
19 if (ctx.profile.support_int16) {
20 return ctx.OpSConvert(ctx.S16, value);
21 } else {
22 return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u));
23 }
24}
25
26Id ExtractU8(EmitContext& ctx, Id value) {
27 if (ctx.profile.support_int8) {
28 return ctx.OpUConvert(ctx.U8, value);
29 } else {
30 return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u));
31 }
32}
33
34Id ExtractS8(EmitContext& ctx, Id value) {
35 if (ctx.profile.support_int8) {
36 return ctx.OpSConvert(ctx.S8, value);
37 } else {
38 return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u));
39 }
40}
41} // Anonymous namespace
42
43Id EmitConvertS16F16(EmitContext& ctx, Id value) {
44 if (ctx.profile.support_int16) {
45 return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
46 } else {
47 return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value));
48 }
49}
50
51Id EmitConvertS16F32(EmitContext& ctx, Id value) {
52 if (ctx.profile.support_int16) {
53 return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
54 } else {
55 return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value));
56 }
57}
58
59Id EmitConvertS16F64(EmitContext& ctx, Id value) {
60 if (ctx.profile.support_int16) {
61 return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
62 } else {
63 return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value));
64 }
65}
66
67Id EmitConvertS32F16(EmitContext& ctx, Id value) {
68 return ctx.OpConvertFToS(ctx.U32[1], value);
69}
70
71Id EmitConvertS32F32(EmitContext& ctx, Id value) {
72 if (ctx.profile.has_broken_signed_operations) {
73 return ctx.OpBitcast(ctx.U32[1], ctx.OpConvertFToS(ctx.S32[1], value));
74 } else {
75 return ctx.OpConvertFToS(ctx.U32[1], value);
76 }
77}
78
79Id EmitConvertS32F64(EmitContext& ctx, Id value) {
80 return ctx.OpConvertFToS(ctx.U32[1], value);
81}
82
83Id EmitConvertS64F16(EmitContext& ctx, Id value) {
84 return ctx.OpConvertFToS(ctx.U64, value);
85}
86
87Id EmitConvertS64F32(EmitContext& ctx, Id value) {
88 return ctx.OpConvertFToS(ctx.U64, value);
89}
90
91Id EmitConvertS64F64(EmitContext& ctx, Id value) {
92 return ctx.OpConvertFToS(ctx.U64, value);
93}
94
95Id EmitConvertU16F16(EmitContext& ctx, Id value) {
96 if (ctx.profile.support_int16) {
97 return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
98 } else {
99 return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value));
100 }
101}
102
103Id EmitConvertU16F32(EmitContext& ctx, Id value) {
104 if (ctx.profile.support_int16) {
105 return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
106 } else {
107 return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value));
108 }
109}
110
111Id EmitConvertU16F64(EmitContext& ctx, Id value) {
112 if (ctx.profile.support_int16) {
113 return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
114 } else {
115 return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value));
116 }
117}
118
119Id EmitConvertU32F16(EmitContext& ctx, Id value) {
120 return ctx.OpConvertFToU(ctx.U32[1], value);
121}
122
123Id EmitConvertU32F32(EmitContext& ctx, Id value) {
124 return ctx.OpConvertFToU(ctx.U32[1], value);
125}
126
127Id EmitConvertU32F64(EmitContext& ctx, Id value) {
128 return ctx.OpConvertFToU(ctx.U32[1], value);
129}
130
131Id EmitConvertU64F16(EmitContext& ctx, Id value) {
132 return ctx.OpConvertFToU(ctx.U64, value);
133}
134
135Id EmitConvertU64F32(EmitContext& ctx, Id value) {
136 return ctx.OpConvertFToU(ctx.U64, value);
137}
138
139Id EmitConvertU64F64(EmitContext& ctx, Id value) {
140 return ctx.OpConvertFToU(ctx.U64, value);
141}
142
143Id EmitConvertU64U32(EmitContext& ctx, Id value) {
144 return ctx.OpUConvert(ctx.U64, value);
145}
146
147Id EmitConvertU32U64(EmitContext& ctx, Id value) {
148 return ctx.OpUConvert(ctx.U32[1], value);
149}
150
151Id EmitConvertF16F32(EmitContext& ctx, Id value) {
152 return ctx.OpFConvert(ctx.F16[1], value);
153}
154
155Id EmitConvertF32F16(EmitContext& ctx, Id value) {
156 return ctx.OpFConvert(ctx.F32[1], value);
157}
158
159Id EmitConvertF32F64(EmitContext& ctx, Id value) {
160 return ctx.OpFConvert(ctx.F32[1], value);
161}
162
163Id EmitConvertF64F32(EmitContext& ctx, Id value) {
164 return ctx.OpFConvert(ctx.F64[1], value);
165}
166
167Id EmitConvertF16S8(EmitContext& ctx, Id value) {
168 return ctx.OpConvertSToF(ctx.F16[1], ExtractS8(ctx, value));
169}
170
171Id EmitConvertF16S16(EmitContext& ctx, Id value) {
172 return ctx.OpConvertSToF(ctx.F16[1], ExtractS16(ctx, value));
173}
174
175Id EmitConvertF16S32(EmitContext& ctx, Id value) {
176 return ctx.OpConvertSToF(ctx.F16[1], value);
177}
178
179Id EmitConvertF16S64(EmitContext& ctx, Id value) {
180 return ctx.OpConvertSToF(ctx.F16[1], value);
181}
182
183Id EmitConvertF16U8(EmitContext& ctx, Id value) {
184 return ctx.OpConvertUToF(ctx.F16[1], ExtractU8(ctx, value));
185}
186
187Id EmitConvertF16U16(EmitContext& ctx, Id value) {
188 return ctx.OpConvertUToF(ctx.F16[1], ExtractU16(ctx, value));
189}
190
191Id EmitConvertF16U32(EmitContext& ctx, Id value) {
192 return ctx.OpConvertUToF(ctx.F16[1], value);
193}
194
195Id EmitConvertF16U64(EmitContext& ctx, Id value) {
196 return ctx.OpConvertUToF(ctx.F16[1], value);
197}
198
199Id EmitConvertF32S8(EmitContext& ctx, Id value) {
200 return ctx.OpConvertSToF(ctx.F32[1], ExtractS8(ctx, value));
201}
202
203Id EmitConvertF32S16(EmitContext& ctx, Id value) {
204 return ctx.OpConvertSToF(ctx.F32[1], ExtractS16(ctx, value));
205}
206
207Id EmitConvertF32S32(EmitContext& ctx, Id value) {
208 if (ctx.profile.has_broken_signed_operations) {
209 value = ctx.OpBitcast(ctx.S32[1], value);
210 }
211 return ctx.OpConvertSToF(ctx.F32[1], value);
212}
213
214Id EmitConvertF32S64(EmitContext& ctx, Id value) {
215 return ctx.OpConvertSToF(ctx.F32[1], value);
216}
217
218Id EmitConvertF32U8(EmitContext& ctx, Id value) {
219 return ctx.OpConvertUToF(ctx.F32[1], ExtractU8(ctx, value));
220}
221
222Id EmitConvertF32U16(EmitContext& ctx, Id value) {
223 return ctx.OpConvertUToF(ctx.F32[1], ExtractU16(ctx, value));
224}
225
226Id EmitConvertF32U32(EmitContext& ctx, Id value) {
227 return ctx.OpConvertUToF(ctx.F32[1], value);
228}
229
230Id EmitConvertF32U64(EmitContext& ctx, Id value) {
231 return ctx.OpConvertUToF(ctx.F32[1], value);
232}
233
234Id EmitConvertF64S8(EmitContext& ctx, Id value) {
235 return ctx.OpConvertSToF(ctx.F64[1], ExtractS8(ctx, value));
236}
237
238Id EmitConvertF64S16(EmitContext& ctx, Id value) {
239 return ctx.OpConvertSToF(ctx.F64[1], ExtractS16(ctx, value));
240}
241
242Id EmitConvertF64S32(EmitContext& ctx, Id value) {
243 if (ctx.profile.has_broken_signed_operations) {
244 value = ctx.OpBitcast(ctx.S32[1], value);
245 }
246 return ctx.OpConvertSToF(ctx.F64[1], value);
247}
248
249Id EmitConvertF64S64(EmitContext& ctx, Id value) {
250 return ctx.OpConvertSToF(ctx.F64[1], value);
251}
252
253Id EmitConvertF64U8(EmitContext& ctx, Id value) {
254 return ctx.OpConvertUToF(ctx.F64[1], ExtractU8(ctx, value));
255}
256
257Id EmitConvertF64U16(EmitContext& ctx, Id value) {
258 return ctx.OpConvertUToF(ctx.F64[1], ExtractU16(ctx, value));
259}
260
261Id EmitConvertF64U32(EmitContext& ctx, Id value) {
262 return ctx.OpConvertUToF(ctx.F64[1], value);
263}
264
265Id EmitConvertF64U64(EmitContext& ctx, Id value) {
266 return ctx.OpConvertUToF(ctx.F64[1], value);
267}
268
269} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
new file mode 100644
index 000000000..61cf25f9c
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
@@ -0,0 +1,396 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7#include "shader_recompiler/frontend/ir/modifiers.h"
8
9namespace Shader::Backend::SPIRV {
10namespace {
11Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) {
12 const auto flags{inst->Flags<IR::FpControl>()};
13 if (flags.no_contraction) {
14 ctx.Decorate(op, spv::Decoration::NoContraction);
15 }
16 return op;
17}
18
19Id Clamp(EmitContext& ctx, Id type, Id value, Id zero, Id one) {
20 if (ctx.profile.has_broken_spirv_clamp) {
21 return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one);
22 } else {
23 return ctx.OpFClamp(type, value, zero, one);
24 }
25}
26
27Id FPOrdNotEqual(EmitContext& ctx, Id lhs, Id rhs) {
28 if (ctx.profile.ignore_nan_fp_comparisons) {
29 const Id comp{ctx.OpFOrdEqual(ctx.U1, lhs, rhs)};
30 const Id lhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, lhs))};
31 const Id rhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, rhs))};
32 return ctx.OpLogicalAnd(ctx.U1, ctx.OpLogicalAnd(ctx.U1, comp, lhs_not_nan), rhs_not_nan);
33 } else {
34 return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs);
35 }
36}
37
38Id FPUnordCompare(Id (EmitContext::*comp_func)(Id, Id, Id), EmitContext& ctx, Id lhs, Id rhs) {
39 if (ctx.profile.ignore_nan_fp_comparisons) {
40 const Id lhs_nan{ctx.OpIsNan(ctx.U1, lhs)};
41 const Id rhs_nan{ctx.OpIsNan(ctx.U1, rhs)};
42 const Id comp{(ctx.*comp_func)(ctx.U1, lhs, rhs)};
43 return ctx.OpLogicalOr(ctx.U1, ctx.OpLogicalOr(ctx.U1, comp, lhs_nan), rhs_nan);
44 } else {
45 return (ctx.*comp_func)(ctx.U1, lhs, rhs);
46 }
47}
48} // Anonymous namespace
49
50Id EmitFPAbs16(EmitContext& ctx, Id value) {
51 return ctx.OpFAbs(ctx.F16[1], value);
52}
53
54Id EmitFPAbs32(EmitContext& ctx, Id value) {
55 return ctx.OpFAbs(ctx.F32[1], value);
56}
57
58Id EmitFPAbs64(EmitContext& ctx, Id value) {
59 return ctx.OpFAbs(ctx.F64[1], value);
60}
61
62Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
63 return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b));
64}
65
66Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
67 return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b));
68}
69
70Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
71 return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b));
72}
73
74Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
75 return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c));
76}
77
78Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
79 return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c));
80}
81
82Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
83 return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c));
84}
85
86Id EmitFPMax32(EmitContext& ctx, Id a, Id b) {
87 return ctx.OpFMax(ctx.F32[1], a, b);
88}
89
90Id EmitFPMax64(EmitContext& ctx, Id a, Id b) {
91 return ctx.OpFMax(ctx.F64[1], a, b);
92}
93
94Id EmitFPMin32(EmitContext& ctx, Id a, Id b) {
95 return ctx.OpFMin(ctx.F32[1], a, b);
96}
97
98Id EmitFPMin64(EmitContext& ctx, Id a, Id b) {
99 return ctx.OpFMin(ctx.F64[1], a, b);
100}
101
102Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
103 return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b));
104}
105
106Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
107 return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b));
108}
109
110Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
111 return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
112}
113
114Id EmitFPNeg16(EmitContext& ctx, Id value) {
115 return ctx.OpFNegate(ctx.F16[1], value);
116}
117
118Id EmitFPNeg32(EmitContext& ctx, Id value) {
119 return ctx.OpFNegate(ctx.F32[1], value);
120}
121
122Id EmitFPNeg64(EmitContext& ctx, Id value) {
123 return ctx.OpFNegate(ctx.F64[1], value);
124}
125
126Id EmitFPSin(EmitContext& ctx, Id value) {
127 return ctx.OpSin(ctx.F32[1], value);
128}
129
130Id EmitFPCos(EmitContext& ctx, Id value) {
131 return ctx.OpCos(ctx.F32[1], value);
132}
133
134Id EmitFPExp2(EmitContext& ctx, Id value) {
135 return ctx.OpExp2(ctx.F32[1], value);
136}
137
138Id EmitFPLog2(EmitContext& ctx, Id value) {
139 return ctx.OpLog2(ctx.F32[1], value);
140}
141
142Id EmitFPRecip32(EmitContext& ctx, Id value) {
143 return ctx.OpFDiv(ctx.F32[1], ctx.Const(1.0f), value);
144}
145
146Id EmitFPRecip64(EmitContext& ctx, Id value) {
147 return ctx.OpFDiv(ctx.F64[1], ctx.Constant(ctx.F64[1], 1.0f), value);
148}
149
150Id EmitFPRecipSqrt32(EmitContext& ctx, Id value) {
151 return ctx.OpInverseSqrt(ctx.F32[1], value);
152}
153
154Id EmitFPRecipSqrt64(EmitContext& ctx, Id value) {
155 return ctx.OpInverseSqrt(ctx.F64[1], value);
156}
157
158Id EmitFPSqrt(EmitContext& ctx, Id value) {
159 return ctx.OpSqrt(ctx.F32[1], value);
160}
161
162Id EmitFPSaturate16(EmitContext& ctx, Id value) {
163 const Id zero{ctx.Constant(ctx.F16[1], u16{0})};
164 const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})};
165 return Clamp(ctx, ctx.F16[1], value, zero, one);
166}
167
168Id EmitFPSaturate32(EmitContext& ctx, Id value) {
169 const Id zero{ctx.Const(f32{0.0})};
170 const Id one{ctx.Const(f32{1.0})};
171 return Clamp(ctx, ctx.F32[1], value, zero, one);
172}
173
174Id EmitFPSaturate64(EmitContext& ctx, Id value) {
175 const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})};
176 const Id one{ctx.Constant(ctx.F64[1], f64{1.0})};
177 return Clamp(ctx, ctx.F64[1], value, zero, one);
178}
179
180Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value) {
181 return Clamp(ctx, ctx.F16[1], value, min_value, max_value);
182}
183
184Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value) {
185 return Clamp(ctx, ctx.F32[1], value, min_value, max_value);
186}
187
188Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value) {
189 return Clamp(ctx, ctx.F64[1], value, min_value, max_value);
190}
191
192Id EmitFPRoundEven16(EmitContext& ctx, Id value) {
193 return ctx.OpRoundEven(ctx.F16[1], value);
194}
195
196Id EmitFPRoundEven32(EmitContext& ctx, Id value) {
197 return ctx.OpRoundEven(ctx.F32[1], value);
198}
199
200Id EmitFPRoundEven64(EmitContext& ctx, Id value) {
201 return ctx.OpRoundEven(ctx.F64[1], value);
202}
203
204Id EmitFPFloor16(EmitContext& ctx, Id value) {
205 return ctx.OpFloor(ctx.F16[1], value);
206}
207
208Id EmitFPFloor32(EmitContext& ctx, Id value) {
209 return ctx.OpFloor(ctx.F32[1], value);
210}
211
212Id EmitFPFloor64(EmitContext& ctx, Id value) {
213 return ctx.OpFloor(ctx.F64[1], value);
214}
215
216Id EmitFPCeil16(EmitContext& ctx, Id value) {
217 return ctx.OpCeil(ctx.F16[1], value);
218}
219
220Id EmitFPCeil32(EmitContext& ctx, Id value) {
221 return ctx.OpCeil(ctx.F32[1], value);
222}
223
224Id EmitFPCeil64(EmitContext& ctx, Id value) {
225 return ctx.OpCeil(ctx.F64[1], value);
226}
227
228Id EmitFPTrunc16(EmitContext& ctx, Id value) {
229 return ctx.OpTrunc(ctx.F16[1], value);
230}
231
232Id EmitFPTrunc32(EmitContext& ctx, Id value) {
233 return ctx.OpTrunc(ctx.F32[1], value);
234}
235
236Id EmitFPTrunc64(EmitContext& ctx, Id value) {
237 return ctx.OpTrunc(ctx.F64[1], value);
238}
239
240Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) {
241 return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
242}
243
244Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs) {
245 return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
246}
247
248Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs) {
249 return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
250}
251
252Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs) {
253 return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs);
254}
255
256Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs) {
257 return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs);
258}
259
260Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs) {
261 return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs);
262}
263
264Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs) {
265 return FPOrdNotEqual(ctx, lhs, rhs);
266}
267
268Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs) {
269 return FPOrdNotEqual(ctx, lhs, rhs);
270}
271
272Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs) {
273 return FPOrdNotEqual(ctx, lhs, rhs);
274}
275
276Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs) {
277 return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
278}
279
280Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs) {
281 return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
282}
283
284Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs) {
285 return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
286}
287
288Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs) {
289 return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
290}
291
292Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs) {
293 return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
294}
295
296Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs) {
297 return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
298}
299
300Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs) {
301 return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs);
302}
303
304Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs) {
305 return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs);
306}
307
308Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs) {
309 return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs);
310}
311
312Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) {
313 return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
314}
315
316Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
317 return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
318}
319
320Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
321 return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
322}
323
324Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) {
325 return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs);
326}
327
328Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
329 return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs);
330}
331
332Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
333 return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs);
334}
335
336Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
337 return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
338}
339
340Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
341 return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
342}
343
344Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
345 return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
346}
347
348Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
349 return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs);
350}
351
352Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
353 return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs);
354}
355
356Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
357 return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs);
358}
359
360Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
361 return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
362}
363
364Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
365 return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
366}
367
368Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
369 return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
370}
371
372Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
373 return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs);
374}
375
376Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
377 return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs);
378}
379
380Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
381 return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs);
382}
383
384Id EmitFPIsNan16(EmitContext& ctx, Id value) {
385 return ctx.OpIsNan(ctx.U1, value);
386}
387
388Id EmitFPIsNan32(EmitContext& ctx, Id value) {
389 return ctx.OpIsNan(ctx.U1, value);
390}
391
392Id EmitFPIsNan64(EmitContext& ctx, Id value) {
393 return ctx.OpIsNan(ctx.U1, value);
394}
395
396} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
new file mode 100644
index 000000000..3588f052b
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -0,0 +1,462 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <boost/container/static_vector.hpp>
6
7#include "shader_recompiler/backend/spirv/emit_spirv.h"
8#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10
11namespace Shader::Backend::SPIRV {
12namespace {
13class ImageOperands {
14public:
15 explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp,
16 Id lod, const IR::Value& offset) {
17 if (has_bias) {
18 const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod};
19 Add(spv::ImageOperandsMask::Bias, bias);
20 }
21 if (has_lod) {
22 const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod};
23 Add(spv::ImageOperandsMask::Lod, lod_value);
24 }
25 AddOffset(ctx, offset);
26 if (has_lod_clamp) {
27 const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod};
28 Add(spv::ImageOperandsMask::MinLod, lod_clamp);
29 }
30 }
31
32 explicit ImageOperands(EmitContext& ctx, const IR::Value& offset, const IR::Value& offset2) {
33 if (offset2.IsEmpty()) {
34 if (offset.IsEmpty()) {
35 return;
36 }
37 Add(spv::ImageOperandsMask::Offset, ctx.Def(offset));
38 return;
39 }
40 const std::array values{offset.InstRecursive(), offset2.InstRecursive()};
41 if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) {
42 LOG_WARNING(Shader_SPIRV, "Not all arguments in PTP are immediate, ignoring");
43 return;
44 }
45 const IR::Opcode opcode{values[0]->GetOpcode()};
46 if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
47 throw LogicError("Invalid PTP arguments");
48 }
49 auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }};
50
51 const Id offsets{ctx.ConstantComposite(
52 ctx.TypeArray(ctx.U32[2], ctx.Const(4U)), ctx.Const(read(0, 0), read(0, 1)),
53 ctx.Const(read(0, 2), read(0, 3)), ctx.Const(read(1, 0), read(1, 1)),
54 ctx.Const(read(1, 2), read(1, 3)))};
55 Add(spv::ImageOperandsMask::ConstOffsets, offsets);
56 }
57
58 explicit ImageOperands(Id offset, Id lod, Id ms) {
59 if (Sirit::ValidId(lod)) {
60 Add(spv::ImageOperandsMask::Lod, lod);
61 }
62 if (Sirit::ValidId(offset)) {
63 Add(spv::ImageOperandsMask::Offset, offset);
64 }
65 if (Sirit::ValidId(ms)) {
66 Add(spv::ImageOperandsMask::Sample, ms);
67 }
68 }
69
70 explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates,
71 Id offset, Id lod_clamp) {
72 if (!Sirit::ValidId(derivates)) {
73 throw LogicError("Derivates must be present");
74 }
75 boost::container::static_vector<Id, 3> deriv_x_accum;
76 boost::container::static_vector<Id, 3> deriv_y_accum;
77 for (u32 i = 0; i < num_derivates; ++i) {
78 deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2));
79 deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1));
80 }
81 const Id derivates_X{ctx.OpCompositeConstruct(
82 ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})};
83 const Id derivates_Y{ctx.OpCompositeConstruct(
84 ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})};
85 Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y);
86 if (Sirit::ValidId(offset)) {
87 Add(spv::ImageOperandsMask::Offset, offset);
88 }
89 if (has_lod_clamp) {
90 Add(spv::ImageOperandsMask::MinLod, lod_clamp);
91 }
92 }
93
94 std::span<const Id> Span() const noexcept {
95 return std::span{operands.data(), operands.size()};
96 }
97
98 std::optional<spv::ImageOperandsMask> MaskOptional() const noexcept {
99 return mask != spv::ImageOperandsMask{} ? std::make_optional(mask) : std::nullopt;
100 }
101
102 spv::ImageOperandsMask Mask() const noexcept {
103 return mask;
104 }
105
106private:
107 void AddOffset(EmitContext& ctx, const IR::Value& offset) {
108 if (offset.IsEmpty()) {
109 return;
110 }
111 if (offset.IsImmediate()) {
112 Add(spv::ImageOperandsMask::ConstOffset, ctx.SConst(static_cast<s32>(offset.U32())));
113 return;
114 }
115 IR::Inst* const inst{offset.InstRecursive()};
116 if (inst->AreAllArgsImmediates()) {
117 switch (inst->GetOpcode()) {
118 case IR::Opcode::CompositeConstructU32x2:
119 Add(spv::ImageOperandsMask::ConstOffset,
120 ctx.SConst(static_cast<s32>(inst->Arg(0).U32()),
121 static_cast<s32>(inst->Arg(1).U32())));
122 return;
123 case IR::Opcode::CompositeConstructU32x3:
124 Add(spv::ImageOperandsMask::ConstOffset,
125 ctx.SConst(static_cast<s32>(inst->Arg(0).U32()),
126 static_cast<s32>(inst->Arg(1).U32()),
127 static_cast<s32>(inst->Arg(2).U32())));
128 return;
129 case IR::Opcode::CompositeConstructU32x4:
130 Add(spv::ImageOperandsMask::ConstOffset,
131 ctx.SConst(static_cast<s32>(inst->Arg(0).U32()),
132 static_cast<s32>(inst->Arg(1).U32()),
133 static_cast<s32>(inst->Arg(2).U32()),
134 static_cast<s32>(inst->Arg(3).U32())));
135 return;
136 default:
137 break;
138 }
139 }
140 Add(spv::ImageOperandsMask::Offset, ctx.Def(offset));
141 }
142
143 void Add(spv::ImageOperandsMask new_mask, Id value) {
144 mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) |
145 static_cast<unsigned>(new_mask));
146 operands.push_back(value);
147 }
148
149 void Add(spv::ImageOperandsMask new_mask, Id value_1, Id value_2) {
150 mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) |
151 static_cast<unsigned>(new_mask));
152 operands.push_back(value_1);
153 operands.push_back(value_2);
154 }
155
156 boost::container::static_vector<Id, 4> operands;
157 spv::ImageOperandsMask mask{};
158};
159
160Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) {
161 const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
162 if (def.count > 1) {
163 const Id pointer{ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(index))};
164 return ctx.OpLoad(def.sampled_type, pointer);
165 } else {
166 return ctx.OpLoad(def.sampled_type, def.id);
167 }
168}
169
170Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& index) {
171 if (!index.IsImmediate() || index.U32() != 0) {
172 throw NotImplementedException("Indirect image indexing");
173 }
174 if (info.type == TextureType::Buffer) {
175 const TextureBufferDefinition& def{ctx.texture_buffers.at(info.descriptor_index)};
176 if (def.count > 1) {
177 throw NotImplementedException("Indirect texture sample");
178 }
179 const Id sampler_id{def.id};
180 const Id id{ctx.OpLoad(ctx.sampled_texture_buffer_type, sampler_id)};
181 return ctx.OpImage(ctx.image_buffer_type, id);
182 } else {
183 const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
184 if (def.count > 1) {
185 throw NotImplementedException("Indirect texture sample");
186 }
187 return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, def.id));
188 }
189}
190
191Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) {
192 if (!index.IsImmediate() || index.U32() != 0) {
193 throw NotImplementedException("Indirect image indexing");
194 }
195 if (info.type == TextureType::Buffer) {
196 const ImageBufferDefinition def{ctx.image_buffers.at(info.descriptor_index)};
197 return ctx.OpLoad(def.image_type, def.id);
198 } else {
199 const ImageDefinition def{ctx.images.at(info.descriptor_index)};
200 return ctx.OpLoad(def.image_type, def.id);
201 }
202}
203
204Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) {
205 const auto info{inst->Flags<IR::TextureInstInfo>()};
206 if (info.relaxed_precision != 0) {
207 ctx.Decorate(sample, spv::Decoration::RelaxedPrecision);
208 }
209 return sample;
210}
211
212template <typename MethodPtrType, typename... Args>
213Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst,
214 Id result_type, Args&&... args) {
215 IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
216 if (!sparse) {
217 return Decorate(ctx, inst, (ctx.*non_sparse_ptr)(result_type, std::forward<Args>(args)...));
218 }
219 const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)};
220 const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward<Args>(args)...)};
221 const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)};
222 sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code));
223 sparse->Invalidate();
224 Decorate(ctx, inst, sample);
225 return ctx.OpCompositeExtract(result_type, sample, 1U);
226}
227} // Anonymous namespace
228
229Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
230 throw LogicError("Unreachable instruction");
231}
232
233Id EmitBindlessImageSampleExplicitLod(EmitContext&) {
234 throw LogicError("Unreachable instruction");
235}
236
237Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
238 throw LogicError("Unreachable instruction");
239}
240
241Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
242 throw LogicError("Unreachable instruction");
243}
244
245Id EmitBindlessImageGather(EmitContext&) {
246 throw LogicError("Unreachable instruction");
247}
248
249Id EmitBindlessImageGatherDref(EmitContext&) {
250 throw LogicError("Unreachable instruction");
251}
252
253Id EmitBindlessImageFetch(EmitContext&) {
254 throw LogicError("Unreachable instruction");
255}
256
257Id EmitBindlessImageQueryDimensions(EmitContext&) {
258 throw LogicError("Unreachable instruction");
259}
260
261Id EmitBindlessImageQueryLod(EmitContext&) {
262 throw LogicError("Unreachable instruction");
263}
264
265Id EmitBindlessImageGradient(EmitContext&) {
266 throw LogicError("Unreachable instruction");
267}
268
269Id EmitBindlessImageRead(EmitContext&) {
270 throw LogicError("Unreachable instruction");
271}
272
273Id EmitBindlessImageWrite(EmitContext&) {
274 throw LogicError("Unreachable instruction");
275}
276
277Id EmitBoundImageSampleImplicitLod(EmitContext&) {
278 throw LogicError("Unreachable instruction");
279}
280
281Id EmitBoundImageSampleExplicitLod(EmitContext&) {
282 throw LogicError("Unreachable instruction");
283}
284
285Id EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
286 throw LogicError("Unreachable instruction");
287}
288
289Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
290 throw LogicError("Unreachable instruction");
291}
292
293Id EmitBoundImageGather(EmitContext&) {
294 throw LogicError("Unreachable instruction");
295}
296
297Id EmitBoundImageGatherDref(EmitContext&) {
298 throw LogicError("Unreachable instruction");
299}
300
301Id EmitBoundImageFetch(EmitContext&) {
302 throw LogicError("Unreachable instruction");
303}
304
305Id EmitBoundImageQueryDimensions(EmitContext&) {
306 throw LogicError("Unreachable instruction");
307}
308
309Id EmitBoundImageQueryLod(EmitContext&) {
310 throw LogicError("Unreachable instruction");
311}
312
313Id EmitBoundImageGradient(EmitContext&) {
314 throw LogicError("Unreachable instruction");
315}
316
317Id EmitBoundImageRead(EmitContext&) {
318 throw LogicError("Unreachable instruction");
319}
320
321Id EmitBoundImageWrite(EmitContext&) {
322 throw LogicError("Unreachable instruction");
323}
324
325Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
326 Id bias_lc, const IR::Value& offset) {
327 const auto info{inst->Flags<IR::TextureInstInfo>()};
328 if (ctx.stage == Stage::Fragment) {
329 const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0,
330 bias_lc, offset);
331 return Emit(&EmitContext::OpImageSparseSampleImplicitLod,
332 &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4],
333 Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
334 } else {
335 // We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as
336 // if the lod was explicitly zero. This may change on Turing with implicit compute
337 // derivatives
338 const Id lod{ctx.Const(0.0f)};
339 const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset);
340 return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
341 &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
342 Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
343 }
344}
345
346Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
347 Id lod, const IR::Value& offset) {
348 const auto info{inst->Flags<IR::TextureInstInfo>()};
349 const ImageOperands operands(ctx, false, true, false, lod, offset);
350 return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
351 &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
352 Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
353}
354
355Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
356 Id coords, Id dref, Id bias_lc, const IR::Value& offset) {
357 const auto info{inst->Flags<IR::TextureInstInfo>()};
358 const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc,
359 offset);
360 return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod,
361 &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1],
362 Texture(ctx, info, index), coords, dref, operands.MaskOptional(), operands.Span());
363}
364
365Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
366 Id coords, Id dref, Id lod, const IR::Value& offset) {
367 const auto info{inst->Flags<IR::TextureInstInfo>()};
368 const ImageOperands operands(ctx, false, true, false, lod, offset);
369 return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod,
370 &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1],
371 Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span());
372}
373
374Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
375 const IR::Value& offset, const IR::Value& offset2) {
376 const auto info{inst->Flags<IR::TextureInstInfo>()};
377 const ImageOperands operands(ctx, offset, offset2);
378 return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst,
379 ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component),
380 operands.MaskOptional(), operands.Span());
381}
382
383Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
384 const IR::Value& offset, const IR::Value& offset2, Id dref) {
385 const auto info{inst->Flags<IR::TextureInstInfo>()};
386 const ImageOperands operands(ctx, offset, offset2);
387 return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst,
388 ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(),
389 operands.Span());
390}
391
392Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
393 Id lod, Id ms) {
394 const auto info{inst->Flags<IR::TextureInstInfo>()};
395 if (info.type == TextureType::Buffer) {
396 lod = Id{};
397 }
398 const ImageOperands operands(offset, lod, ms);
399 return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
400 TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
401}
402
403Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) {
404 const auto info{inst->Flags<IR::TextureInstInfo>()};
405 const Id image{TextureImage(ctx, info, index)};
406 const Id zero{ctx.u32_zero_value};
407 const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }};
408 switch (info.type) {
409 case TextureType::Color1D:
410 return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod),
411 zero, zero, mips());
412 case TextureType::ColorArray1D:
413 case TextureType::Color2D:
414 case TextureType::ColorCube:
415 return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[2], image, lod),
416 zero, mips());
417 case TextureType::ColorArray2D:
418 case TextureType::Color3D:
419 case TextureType::ColorArrayCube:
420 return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod),
421 mips());
422 case TextureType::Buffer:
423 return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySize(ctx.U32[1], image), zero,
424 zero, mips());
425 }
426 throw LogicError("Unspecified image type {}", info.type.Value());
427}
428
429Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
430 const auto info{inst->Flags<IR::TextureInstInfo>()};
431 const Id zero{ctx.f32_zero_value};
432 const Id sampler{Texture(ctx, info, index)};
433 return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], sampler, coords),
434 zero, zero);
435}
436
437Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
438 Id derivates, Id offset, Id lod_clamp) {
439 const auto info{inst->Flags<IR::TextureInstInfo>()};
440 const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates,
441 offset, lod_clamp);
442 return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
443 &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
444 Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
445}
446
447Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
448 const auto info{inst->Flags<IR::TextureInstInfo>()};
449 if (info.image_format == ImageFormat::Typeless && !ctx.profile.support_typeless_image_loads) {
450 LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host");
451 return ctx.ConstantNull(ctx.U32[4]);
452 }
453 return Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, ctx.U32[4],
454 Image(ctx, index, info), coords, std::nullopt, std::span<const Id>{});
455}
456
457void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) {
458 const auto info{inst->Flags<IR::TextureInstInfo>()};
459 ctx.OpImageWrite(Image(ctx, index, info), coords, color);
460}
461
462} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp
new file mode 100644
index 000000000..d7f1a365a
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp
@@ -0,0 +1,183 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7#include "shader_recompiler/frontend/ir/modifiers.h"
8
9namespace Shader::Backend::SPIRV {
10namespace {
11Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) {
12 if (!index.IsImmediate()) {
13 throw NotImplementedException("Indirect image indexing");
14 }
15 if (info.type == TextureType::Buffer) {
16 const ImageBufferDefinition def{ctx.image_buffers.at(index.U32())};
17 return def.id;
18 } else {
19 const ImageDefinition def{ctx.images.at(index.U32())};
20 return def.id;
21 }
22}
23
24std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
25 const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))};
26 const Id semantics{ctx.u32_zero_value};
27 return {scope, semantics};
28}
29
30Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id value,
31 Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
32 const auto info{inst->Flags<IR::TextureInstInfo>()};
33 const Id image{Image(ctx, index, info)};
34 const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, image, coords, ctx.Const(0U))};
35 const auto [scope, semantics]{AtomicArgs(ctx)};
36 return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
37}
38} // Anonymous namespace
39
40Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
41 Id value) {
42 return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicIAdd);
43}
44
45Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
46 Id value) {
47 return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMin);
48}
49
50Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
51 Id value) {
52 return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMin);
53}
54
55Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
56 Id value) {
57 return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMax);
58}
59
60Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
61 Id value) {
62 return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMax);
63}
64
65Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) {
66 // TODO: This is not yet implemented
67 throw NotImplementedException("SPIR-V Instruction");
68}
69
70Id EmitImageAtomicDec32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) {
71 // TODO: This is not yet implemented
72 throw NotImplementedException("SPIR-V Instruction");
73}
74
75Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
76 Id value) {
77 return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicAnd);
78}
79
80Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
81 Id value) {
82 return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicOr);
83}
84
85Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
86 Id value) {
87 return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicXor);
88}
89
90Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
91 Id value) {
92 return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicExchange);
93}
94
95Id EmitBindlessImageAtomicIAdd32(EmitContext&) {
96 throw NotImplementedException("SPIR-V Instruction");
97}
98
99Id EmitBindlessImageAtomicSMin32(EmitContext&) {
100 throw NotImplementedException("SPIR-V Instruction");
101}
102
103Id EmitBindlessImageAtomicUMin32(EmitContext&) {
104 throw NotImplementedException("SPIR-V Instruction");
105}
106
107Id EmitBindlessImageAtomicSMax32(EmitContext&) {
108 throw NotImplementedException("SPIR-V Instruction");
109}
110
111Id EmitBindlessImageAtomicUMax32(EmitContext&) {
112 throw NotImplementedException("SPIR-V Instruction");
113}
114
115Id EmitBindlessImageAtomicInc32(EmitContext&) {
116 throw NotImplementedException("SPIR-V Instruction");
117}
118
119Id EmitBindlessImageAtomicDec32(EmitContext&) {
120 throw NotImplementedException("SPIR-V Instruction");
121}
122
123Id EmitBindlessImageAtomicAnd32(EmitContext&) {
124 throw NotImplementedException("SPIR-V Instruction");
125}
126
127Id EmitBindlessImageAtomicOr32(EmitContext&) {
128 throw NotImplementedException("SPIR-V Instruction");
129}
130
131Id EmitBindlessImageAtomicXor32(EmitContext&) {
132 throw NotImplementedException("SPIR-V Instruction");
133}
134
135Id EmitBindlessImageAtomicExchange32(EmitContext&) {
136 throw NotImplementedException("SPIR-V Instruction");
137}
138
139Id EmitBoundImageAtomicIAdd32(EmitContext&) {
140 throw NotImplementedException("SPIR-V Instruction");
141}
142
143Id EmitBoundImageAtomicSMin32(EmitContext&) {
144 throw NotImplementedException("SPIR-V Instruction");
145}
146
147Id EmitBoundImageAtomicUMin32(EmitContext&) {
148 throw NotImplementedException("SPIR-V Instruction");
149}
150
151Id EmitBoundImageAtomicSMax32(EmitContext&) {
152 throw NotImplementedException("SPIR-V Instruction");
153}
154
155Id EmitBoundImageAtomicUMax32(EmitContext&) {
156 throw NotImplementedException("SPIR-V Instruction");
157}
158
159Id EmitBoundImageAtomicInc32(EmitContext&) {
160 throw NotImplementedException("SPIR-V Instruction");
161}
162
163Id EmitBoundImageAtomicDec32(EmitContext&) {
164 throw NotImplementedException("SPIR-V Instruction");
165}
166
167Id EmitBoundImageAtomicAnd32(EmitContext&) {
168 throw NotImplementedException("SPIR-V Instruction");
169}
170
171Id EmitBoundImageAtomicOr32(EmitContext&) {
172 throw NotImplementedException("SPIR-V Instruction");
173}
174
175Id EmitBoundImageAtomicXor32(EmitContext&) {
176 throw NotImplementedException("SPIR-V Instruction");
177}
178
179Id EmitBoundImageAtomicExchange32(EmitContext&) {
180 throw NotImplementedException("SPIR-V Instruction");
181}
182
183} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
new file mode 100644
index 000000000..f99c02848
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -0,0 +1,579 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <sirit/sirit.h>
6
7#include "common/common_types.h"
8
9namespace Shader::IR {
10enum class Attribute : u64;
11enum class Patch : u64;
12class Inst;
13class Value;
14} // namespace Shader::IR
15
16namespace Shader::Backend::SPIRV {
17
18using Sirit::Id;
19
20class EmitContext;
21
22// Microinstruction emitters
23Id EmitPhi(EmitContext& ctx, IR::Inst* inst);
24void EmitVoid(EmitContext& ctx);
25Id EmitIdentity(EmitContext& ctx, const IR::Value& value);
26Id EmitConditionRef(EmitContext& ctx, const IR::Value& value);
27void EmitReference(EmitContext&);
28void EmitPhiMove(EmitContext&);
29void EmitJoin(EmitContext& ctx);
30void EmitDemoteToHelperInvocation(EmitContext& ctx);
31void EmitBarrier(EmitContext& ctx);
32void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
33void EmitDeviceMemoryBarrier(EmitContext& ctx);
34void EmitPrologue(EmitContext& ctx);
35void EmitEpilogue(EmitContext& ctx);
36void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream);
37void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);
38void EmitGetRegister(EmitContext& ctx);
39void EmitSetRegister(EmitContext& ctx);
40void EmitGetPred(EmitContext& ctx);
41void EmitSetPred(EmitContext& ctx);
42void EmitSetGotoVariable(EmitContext& ctx);
43void EmitGetGotoVariable(EmitContext& ctx);
44void EmitSetIndirectBranchVariable(EmitContext& ctx);
45void EmitGetIndirectBranchVariable(EmitContext& ctx);
46Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
47Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
48Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
49Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
50Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
51Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
52Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
53Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex);
54void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex);
55Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex);
56void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex);
57Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);
58void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value);
59void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
60void EmitSetSampleMask(EmitContext& ctx, Id value);
61void EmitSetFragDepth(EmitContext& ctx, Id value);
62void EmitGetZFlag(EmitContext& ctx);
63void EmitGetSFlag(EmitContext& ctx);
64void EmitGetCFlag(EmitContext& ctx);
65void EmitGetOFlag(EmitContext& ctx);
66void EmitSetZFlag(EmitContext& ctx);
67void EmitSetSFlag(EmitContext& ctx);
68void EmitSetCFlag(EmitContext& ctx);
69void EmitSetOFlag(EmitContext& ctx);
70Id EmitWorkgroupId(EmitContext& ctx);
71Id EmitLocalInvocationId(EmitContext& ctx);
72Id EmitInvocationId(EmitContext& ctx);
73Id EmitSampleId(EmitContext& ctx);
74Id EmitIsHelperInvocation(EmitContext& ctx);
75Id EmitYDirection(EmitContext& ctx);
76Id EmitLoadLocal(EmitContext& ctx, Id word_offset);
77void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value);
78Id EmitUndefU1(EmitContext& ctx);
79Id EmitUndefU8(EmitContext& ctx);
80Id EmitUndefU16(EmitContext& ctx);
81Id EmitUndefU32(EmitContext& ctx);
82Id EmitUndefU64(EmitContext& ctx);
83void EmitLoadGlobalU8(EmitContext& ctx);
84void EmitLoadGlobalS8(EmitContext& ctx);
85void EmitLoadGlobalU16(EmitContext& ctx);
86void EmitLoadGlobalS16(EmitContext& ctx);
87Id EmitLoadGlobal32(EmitContext& ctx, Id address);
88Id EmitLoadGlobal64(EmitContext& ctx, Id address);
89Id EmitLoadGlobal128(EmitContext& ctx, Id address);
90void EmitWriteGlobalU8(EmitContext& ctx);
91void EmitWriteGlobalS8(EmitContext& ctx);
92void EmitWriteGlobalU16(EmitContext& ctx);
93void EmitWriteGlobalS16(EmitContext& ctx);
94void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value);
95void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value);
96void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value);
97Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
98Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
99Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
100Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
101Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
102Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
103Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
104void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
105 Id value);
106void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
107 Id value);
108void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
109 Id value);
110void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
111 Id value);
112void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
113 Id value);
114void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
115 Id value);
116void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
117 Id value);
118Id EmitLoadSharedU8(EmitContext& ctx, Id offset);
119Id EmitLoadSharedS8(EmitContext& ctx, Id offset);
120Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
121Id EmitLoadSharedS16(EmitContext& ctx, Id offset);
122Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
123Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
124Id EmitLoadSharedU128(EmitContext& ctx, Id offset);
125void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value);
126void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
127void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
128void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
129void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value);
130Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
131Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
132Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
133Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index);
134Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index);
135Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index);
136Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index);
137Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index);
138Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index);
139Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2);
140Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3);
141Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
142Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index);
143Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index);
144Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index);
145Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index);
146Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index);
147Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index);
148Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2);
149Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
150Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
151Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index);
152Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index);
153Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index);
154Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index);
155Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index);
156Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index);
157void EmitCompositeConstructF64x2(EmitContext& ctx);
158void EmitCompositeConstructF64x3(EmitContext& ctx);
159void EmitCompositeConstructF64x4(EmitContext& ctx);
160void EmitCompositeExtractF64x2(EmitContext& ctx);
161void EmitCompositeExtractF64x3(EmitContext& ctx);
162void EmitCompositeExtractF64x4(EmitContext& ctx);
163Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index);
164Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index);
165Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index);
166Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value);
167Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value);
168Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
169Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
170Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
171Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
172Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
173Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
174void EmitBitCastU16F16(EmitContext& ctx);
175Id EmitBitCastU32F32(EmitContext& ctx, Id value);
176void EmitBitCastU64F64(EmitContext& ctx);
177void EmitBitCastF16U16(EmitContext& ctx);
178Id EmitBitCastF32U32(EmitContext& ctx, Id value);
179void EmitBitCastF64U64(EmitContext& ctx);
180Id EmitPackUint2x32(EmitContext& ctx, Id value);
181Id EmitUnpackUint2x32(EmitContext& ctx, Id value);
182Id EmitPackFloat2x16(EmitContext& ctx, Id value);
183Id EmitUnpackFloat2x16(EmitContext& ctx, Id value);
184Id EmitPackHalf2x16(EmitContext& ctx, Id value);
185Id EmitUnpackHalf2x16(EmitContext& ctx, Id value);
186Id EmitPackDouble2x32(EmitContext& ctx, Id value);
187Id EmitUnpackDouble2x32(EmitContext& ctx, Id value);
188void EmitGetZeroFromOp(EmitContext& ctx);
189void EmitGetSignFromOp(EmitContext& ctx);
190void EmitGetCarryFromOp(EmitContext& ctx);
191void EmitGetOverflowFromOp(EmitContext& ctx);
192void EmitGetSparseFromOp(EmitContext& ctx);
193void EmitGetInBoundsFromOp(EmitContext& ctx);
194Id EmitFPAbs16(EmitContext& ctx, Id value);
195Id EmitFPAbs32(EmitContext& ctx, Id value);
196Id EmitFPAbs64(EmitContext& ctx, Id value);
197Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
198Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
199Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
200Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
201Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
202Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
203Id EmitFPMax32(EmitContext& ctx, Id a, Id b);
204Id EmitFPMax64(EmitContext& ctx, Id a, Id b);
205Id EmitFPMin32(EmitContext& ctx, Id a, Id b);
206Id EmitFPMin64(EmitContext& ctx, Id a, Id b);
207Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
208Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
209Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
210Id EmitFPNeg16(EmitContext& ctx, Id value);
211Id EmitFPNeg32(EmitContext& ctx, Id value);
212Id EmitFPNeg64(EmitContext& ctx, Id value);
213Id EmitFPSin(EmitContext& ctx, Id value);
214Id EmitFPCos(EmitContext& ctx, Id value);
215Id EmitFPExp2(EmitContext& ctx, Id value);
216Id EmitFPLog2(EmitContext& ctx, Id value);
217Id EmitFPRecip32(EmitContext& ctx, Id value);
218Id EmitFPRecip64(EmitContext& ctx, Id value);
219Id EmitFPRecipSqrt32(EmitContext& ctx, Id value);
220Id EmitFPRecipSqrt64(EmitContext& ctx, Id value);
221Id EmitFPSqrt(EmitContext& ctx, Id value);
222Id EmitFPSaturate16(EmitContext& ctx, Id value);
223Id EmitFPSaturate32(EmitContext& ctx, Id value);
224Id EmitFPSaturate64(EmitContext& ctx, Id value);
225Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value);
226Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value);
227Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value);
228Id EmitFPRoundEven16(EmitContext& ctx, Id value);
229Id EmitFPRoundEven32(EmitContext& ctx, Id value);
230Id EmitFPRoundEven64(EmitContext& ctx, Id value);
231Id EmitFPFloor16(EmitContext& ctx, Id value);
232Id EmitFPFloor32(EmitContext& ctx, Id value);
233Id EmitFPFloor64(EmitContext& ctx, Id value);
234Id EmitFPCeil16(EmitContext& ctx, Id value);
235Id EmitFPCeil32(EmitContext& ctx, Id value);
236Id EmitFPCeil64(EmitContext& ctx, Id value);
237Id EmitFPTrunc16(EmitContext& ctx, Id value);
238Id EmitFPTrunc32(EmitContext& ctx, Id value);
239Id EmitFPTrunc64(EmitContext& ctx, Id value);
240Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs);
241Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs);
242Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs);
243Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs);
244Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs);
245Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs);
246Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs);
247Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs);
248Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs);
249Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs);
250Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs);
251Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs);
252Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs);
253Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs);
254Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs);
255Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs);
256Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs);
257Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs);
258Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs);
259Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
260Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
261Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs);
262Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
263Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
264Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
265Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
266Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
267Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
268Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
269Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
270Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
271Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
272Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
273Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
274Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
275Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
276Id EmitFPIsNan16(EmitContext& ctx, Id value);
277Id EmitFPIsNan32(EmitContext& ctx, Id value);
278Id EmitFPIsNan64(EmitContext& ctx, Id value);
279Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
280Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
281Id EmitISub32(EmitContext& ctx, Id a, Id b);
282Id EmitISub64(EmitContext& ctx, Id a, Id b);
283Id EmitIMul32(EmitContext& ctx, Id a, Id b);
284Id EmitINeg32(EmitContext& ctx, Id value);
285Id EmitINeg64(EmitContext& ctx, Id value);
286Id EmitIAbs32(EmitContext& ctx, Id value);
287Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift);
288Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift);
289Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift);
290Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift);
291Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift);
292Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift);
293Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
294Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
295Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
296Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count);
297Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count);
298Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count);
299Id EmitBitReverse32(EmitContext& ctx, Id value);
300Id EmitBitCount32(EmitContext& ctx, Id value);
301Id EmitBitwiseNot32(EmitContext& ctx, Id value);
302Id EmitFindSMsb32(EmitContext& ctx, Id value);
303Id EmitFindUMsb32(EmitContext& ctx, Id value);
304Id EmitSMin32(EmitContext& ctx, Id a, Id b);
305Id EmitUMin32(EmitContext& ctx, Id a, Id b);
306Id EmitSMax32(EmitContext& ctx, Id a, Id b);
307Id EmitUMax32(EmitContext& ctx, Id a, Id b);
308Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max);
309Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max);
310Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs);
311Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs);
312Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs);
313Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
314Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
315Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
316Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
317Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs);
318Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
319Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
320Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value);
321Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value);
322Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value);
323Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value);
324Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value);
325Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value);
326Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value);
327Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value);
328Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value);
329Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value);
330Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value);
331Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value);
332Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
333 Id value);
334Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
335 Id value);
336Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
337 Id value);
338Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
339 Id value);
340Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
341 Id value);
342Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
343 Id value);
344Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
345 Id value);
346Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
347 Id value);
348Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
349 Id value);
350Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
351 Id value);
352Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
353 Id value);
354Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
355 Id value);
356Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
357 Id value);
358Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
359 Id value);
360Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
361 Id value);
362Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
363 Id value);
364Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
365 Id value);
366Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
367 Id value);
368Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
369 Id value);
370Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
371 Id value);
372Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
373 Id value);
374Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
375 Id value);
376Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
377 Id value);
378Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
379 Id value);
380Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
381 Id value);
382Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
383 Id value);
384Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
385 Id value);
386Id EmitGlobalAtomicIAdd32(EmitContext& ctx);
387Id EmitGlobalAtomicSMin32(EmitContext& ctx);
388Id EmitGlobalAtomicUMin32(EmitContext& ctx);
389Id EmitGlobalAtomicSMax32(EmitContext& ctx);
390Id EmitGlobalAtomicUMax32(EmitContext& ctx);
391Id EmitGlobalAtomicInc32(EmitContext& ctx);
392Id EmitGlobalAtomicDec32(EmitContext& ctx);
393Id EmitGlobalAtomicAnd32(EmitContext& ctx);
394Id EmitGlobalAtomicOr32(EmitContext& ctx);
395Id EmitGlobalAtomicXor32(EmitContext& ctx);
396Id EmitGlobalAtomicExchange32(EmitContext& ctx);
397Id EmitGlobalAtomicIAdd64(EmitContext& ctx);
398Id EmitGlobalAtomicSMin64(EmitContext& ctx);
399Id EmitGlobalAtomicUMin64(EmitContext& ctx);
400Id EmitGlobalAtomicSMax64(EmitContext& ctx);
401Id EmitGlobalAtomicUMax64(EmitContext& ctx);
402Id EmitGlobalAtomicInc64(EmitContext& ctx);
403Id EmitGlobalAtomicDec64(EmitContext& ctx);
404Id EmitGlobalAtomicAnd64(EmitContext& ctx);
405Id EmitGlobalAtomicOr64(EmitContext& ctx);
406Id EmitGlobalAtomicXor64(EmitContext& ctx);
407Id EmitGlobalAtomicExchange64(EmitContext& ctx);
408Id EmitGlobalAtomicAddF32(EmitContext& ctx);
409Id EmitGlobalAtomicAddF16x2(EmitContext& ctx);
410Id EmitGlobalAtomicAddF32x2(EmitContext& ctx);
411Id EmitGlobalAtomicMinF16x2(EmitContext& ctx);
412Id EmitGlobalAtomicMinF32x2(EmitContext& ctx);
413Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
414Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
415Id EmitLogicalOr(EmitContext& ctx, Id a, Id b);
416Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b);
417Id EmitLogicalXor(EmitContext& ctx, Id a, Id b);
418Id EmitLogicalNot(EmitContext& ctx, Id value);
419Id EmitConvertS16F16(EmitContext& ctx, Id value);
420Id EmitConvertS16F32(EmitContext& ctx, Id value);
421Id EmitConvertS16F64(EmitContext& ctx, Id value);
422Id EmitConvertS32F16(EmitContext& ctx, Id value);
423Id EmitConvertS32F32(EmitContext& ctx, Id value);
424Id EmitConvertS32F64(EmitContext& ctx, Id value);
425Id EmitConvertS64F16(EmitContext& ctx, Id value);
426Id EmitConvertS64F32(EmitContext& ctx, Id value);
427Id EmitConvertS64F64(EmitContext& ctx, Id value);
428Id EmitConvertU16F16(EmitContext& ctx, Id value);
429Id EmitConvertU16F32(EmitContext& ctx, Id value);
430Id EmitConvertU16F64(EmitContext& ctx, Id value);
431Id EmitConvertU32F16(EmitContext& ctx, Id value);
432Id EmitConvertU32F32(EmitContext& ctx, Id value);
433Id EmitConvertU32F64(EmitContext& ctx, Id value);
434Id EmitConvertU64F16(EmitContext& ctx, Id value);
435Id EmitConvertU64F32(EmitContext& ctx, Id value);
436Id EmitConvertU64F64(EmitContext& ctx, Id value);
437Id EmitConvertU64U32(EmitContext& ctx, Id value);
438Id EmitConvertU32U64(EmitContext& ctx, Id value);
439Id EmitConvertF16F32(EmitContext& ctx, Id value);
440Id EmitConvertF32F16(EmitContext& ctx, Id value);
441Id EmitConvertF32F64(EmitContext& ctx, Id value);
442Id EmitConvertF64F32(EmitContext& ctx, Id value);
443Id EmitConvertF16S8(EmitContext& ctx, Id value);
444Id EmitConvertF16S16(EmitContext& ctx, Id value);
445Id EmitConvertF16S32(EmitContext& ctx, Id value);
446Id EmitConvertF16S64(EmitContext& ctx, Id value);
447Id EmitConvertF16U8(EmitContext& ctx, Id value);
448Id EmitConvertF16U16(EmitContext& ctx, Id value);
449Id EmitConvertF16U32(EmitContext& ctx, Id value);
450Id EmitConvertF16U64(EmitContext& ctx, Id value);
451Id EmitConvertF32S8(EmitContext& ctx, Id value);
452Id EmitConvertF32S16(EmitContext& ctx, Id value);
453Id EmitConvertF32S32(EmitContext& ctx, Id value);
454Id EmitConvertF32S64(EmitContext& ctx, Id value);
455Id EmitConvertF32U8(EmitContext& ctx, Id value);
456Id EmitConvertF32U16(EmitContext& ctx, Id value);
457Id EmitConvertF32U32(EmitContext& ctx, Id value);
458Id EmitConvertF32U64(EmitContext& ctx, Id value);
459Id EmitConvertF64S8(EmitContext& ctx, Id value);
460Id EmitConvertF64S16(EmitContext& ctx, Id value);
461Id EmitConvertF64S32(EmitContext& ctx, Id value);
462Id EmitConvertF64S64(EmitContext& ctx, Id value);
463Id EmitConvertF64U8(EmitContext& ctx, Id value);
464Id EmitConvertF64U16(EmitContext& ctx, Id value);
465Id EmitConvertF64U32(EmitContext& ctx, Id value);
466Id EmitConvertF64U64(EmitContext& ctx, Id value);
467Id EmitBindlessImageSampleImplicitLod(EmitContext&);
468Id EmitBindlessImageSampleExplicitLod(EmitContext&);
469Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
470Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
471Id EmitBindlessImageGather(EmitContext&);
472Id EmitBindlessImageGatherDref(EmitContext&);
473Id EmitBindlessImageFetch(EmitContext&);
474Id EmitBindlessImageQueryDimensions(EmitContext&);
475Id EmitBindlessImageQueryLod(EmitContext&);
476Id EmitBindlessImageGradient(EmitContext&);
477Id EmitBindlessImageRead(EmitContext&);
478Id EmitBindlessImageWrite(EmitContext&);
479Id EmitBoundImageSampleImplicitLod(EmitContext&);
480Id EmitBoundImageSampleExplicitLod(EmitContext&);
481Id EmitBoundImageSampleDrefImplicitLod(EmitContext&);
482Id EmitBoundImageSampleDrefExplicitLod(EmitContext&);
483Id EmitBoundImageGather(EmitContext&);
484Id EmitBoundImageGatherDref(EmitContext&);
485Id EmitBoundImageFetch(EmitContext&);
486Id EmitBoundImageQueryDimensions(EmitContext&);
487Id EmitBoundImageQueryLod(EmitContext&);
488Id EmitBoundImageGradient(EmitContext&);
489Id EmitBoundImageRead(EmitContext&);
490Id EmitBoundImageWrite(EmitContext&);
491Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
492 Id bias_lc, const IR::Value& offset);
493Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
494 Id lod, const IR::Value& offset);
495Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
496 Id coords, Id dref, Id bias_lc, const IR::Value& offset);
497Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
498 Id coords, Id dref, Id lod, const IR::Value& offset);
499Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
500 const IR::Value& offset, const IR::Value& offset2);
501Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
502 const IR::Value& offset, const IR::Value& offset2, Id dref);
503Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
504 Id lod, Id ms);
505Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod);
506Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
507Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
508 Id derivates, Id offset, Id lod_clamp);
509Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
510void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color);
511Id EmitBindlessImageAtomicIAdd32(EmitContext&);
512Id EmitBindlessImageAtomicSMin32(EmitContext&);
513Id EmitBindlessImageAtomicUMin32(EmitContext&);
514Id EmitBindlessImageAtomicSMax32(EmitContext&);
515Id EmitBindlessImageAtomicUMax32(EmitContext&);
516Id EmitBindlessImageAtomicInc32(EmitContext&);
517Id EmitBindlessImageAtomicDec32(EmitContext&);
518Id EmitBindlessImageAtomicAnd32(EmitContext&);
519Id EmitBindlessImageAtomicOr32(EmitContext&);
520Id EmitBindlessImageAtomicXor32(EmitContext&);
521Id EmitBindlessImageAtomicExchange32(EmitContext&);
522Id EmitBoundImageAtomicIAdd32(EmitContext&);
523Id EmitBoundImageAtomicSMin32(EmitContext&);
524Id EmitBoundImageAtomicUMin32(EmitContext&);
525Id EmitBoundImageAtomicSMax32(EmitContext&);
526Id EmitBoundImageAtomicUMax32(EmitContext&);
527Id EmitBoundImageAtomicInc32(EmitContext&);
528Id EmitBoundImageAtomicDec32(EmitContext&);
529Id EmitBoundImageAtomicAnd32(EmitContext&);
530Id EmitBoundImageAtomicOr32(EmitContext&);
531Id EmitBoundImageAtomicXor32(EmitContext&);
532Id EmitBoundImageAtomicExchange32(EmitContext&);
533Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
534 Id value);
535Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
536 Id value);
537Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
538 Id value);
539Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
540 Id value);
541Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
542 Id value);
543Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
544 Id value);
545Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
546 Id value);
547Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
548 Id value);
549Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
550 Id value);
551Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
552 Id value);
553Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
554 Id value);
555Id EmitLaneId(EmitContext& ctx);
556Id EmitVoteAll(EmitContext& ctx, Id pred);
557Id EmitVoteAny(EmitContext& ctx, Id pred);
558Id EmitVoteEqual(EmitContext& ctx, Id pred);
559Id EmitSubgroupBallot(EmitContext& ctx, Id pred);
560Id EmitSubgroupEqMask(EmitContext& ctx);
561Id EmitSubgroupLtMask(EmitContext& ctx);
562Id EmitSubgroupLeMask(EmitContext& ctx);
563Id EmitSubgroupGtMask(EmitContext& ctx);
564Id EmitSubgroupGeMask(EmitContext& ctx);
565Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
566 Id segmentation_mask);
567Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
568 Id segmentation_mask);
569Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
570 Id segmentation_mask);
571Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
572 Id segmentation_mask);
573Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle);
574Id EmitDPdxFine(EmitContext& ctx, Id op_a);
575Id EmitDPdyFine(EmitContext& ctx, Id op_a);
576Id EmitDPdxCoarse(EmitContext& ctx, Id op_a);
577Id EmitDPdyCoarse(EmitContext& ctx, Id op_a);
578
579} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
new file mode 100644
index 000000000..3501d7495
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
@@ -0,0 +1,270 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9namespace {
10void SetZeroFlag(EmitContext& ctx, IR::Inst* inst, Id result) {
11 IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)};
12 if (!zero) {
13 return;
14 }
15 zero->SetDefinition(ctx.OpIEqual(ctx.U1, result, ctx.u32_zero_value));
16 zero->Invalidate();
17}
18
19void SetSignFlag(EmitContext& ctx, IR::Inst* inst, Id result) {
20 IR::Inst* const sign{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)};
21 if (!sign) {
22 return;
23 }
24 sign->SetDefinition(ctx.OpSLessThan(ctx.U1, result, ctx.u32_zero_value));
25 sign->Invalidate();
26}
27} // Anonymous namespace
28
29Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
30 Id result{};
31 if (IR::Inst* const carry{inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) {
32 const Id carry_type{ctx.TypeStruct(ctx.U32[1], ctx.U32[1])};
33 const Id carry_result{ctx.OpIAddCarry(carry_type, a, b)};
34 result = ctx.OpCompositeExtract(ctx.U32[1], carry_result, 0U);
35
36 const Id carry_value{ctx.OpCompositeExtract(ctx.U32[1], carry_result, 1U)};
37 carry->SetDefinition(ctx.OpINotEqual(ctx.U1, carry_value, ctx.u32_zero_value));
38 carry->Invalidate();
39 } else {
40 result = ctx.OpIAdd(ctx.U32[1], a, b);
41 }
42 SetZeroFlag(ctx, inst, result);
43 SetSignFlag(ctx, inst, result);
44 if (IR::Inst * overflow{inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) {
45 // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c
46 constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())};
47 const Id is_positive{ctx.OpSGreaterThanEqual(ctx.U1, a, ctx.u32_zero_value)};
48 const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Const(s32_max), a)};
49
50 const Id positive_test{ctx.OpSGreaterThan(ctx.U1, b, sub_a)};
51 const Id negative_test{ctx.OpSLessThan(ctx.U1, b, sub_a)};
52 const Id carry_flag{ctx.OpSelect(ctx.U1, is_positive, positive_test, negative_test)};
53 overflow->SetDefinition(carry_flag);
54 overflow->Invalidate();
55 }
56 return result;
57}
58
59Id EmitIAdd64(EmitContext& ctx, Id a, Id b) {
60 return ctx.OpIAdd(ctx.U64, a, b);
61}
62
63Id EmitISub32(EmitContext& ctx, Id a, Id b) {
64 return ctx.OpISub(ctx.U32[1], a, b);
65}
66
67Id EmitISub64(EmitContext& ctx, Id a, Id b) {
68 return ctx.OpISub(ctx.U64, a, b);
69}
70
71Id EmitIMul32(EmitContext& ctx, Id a, Id b) {
72 return ctx.OpIMul(ctx.U32[1], a, b);
73}
74
75Id EmitINeg32(EmitContext& ctx, Id value) {
76 return ctx.OpSNegate(ctx.U32[1], value);
77}
78
79Id EmitINeg64(EmitContext& ctx, Id value) {
80 return ctx.OpSNegate(ctx.U64, value);
81}
82
83Id EmitIAbs32(EmitContext& ctx, Id value) {
84 return ctx.OpSAbs(ctx.U32[1], value);
85}
86
87Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) {
88 return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift);
89}
90
91Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift) {
92 return ctx.OpShiftLeftLogical(ctx.U64, base, shift);
93}
94
95Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift) {
96 return ctx.OpShiftRightLogical(ctx.U32[1], base, shift);
97}
98
99Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift) {
100 return ctx.OpShiftRightLogical(ctx.U64, base, shift);
101}
102
103Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift) {
104 return ctx.OpShiftRightArithmetic(ctx.U32[1], base, shift);
105}
106
107Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift) {
108 return ctx.OpShiftRightArithmetic(ctx.U64, base, shift);
109}
110
111Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
112 const Id result{ctx.OpBitwiseAnd(ctx.U32[1], a, b)};
113 SetZeroFlag(ctx, inst, result);
114 SetSignFlag(ctx, inst, result);
115 return result;
116}
117
118Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
119 const Id result{ctx.OpBitwiseOr(ctx.U32[1], a, b)};
120 SetZeroFlag(ctx, inst, result);
121 SetSignFlag(ctx, inst, result);
122 return result;
123}
124
125Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
126 const Id result{ctx.OpBitwiseXor(ctx.U32[1], a, b)};
127 SetZeroFlag(ctx, inst, result);
128 SetSignFlag(ctx, inst, result);
129 return result;
130}
131
132Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count) {
133 return ctx.OpBitFieldInsert(ctx.U32[1], base, insert, offset, count);
134}
135
136Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) {
137 const Id result{ctx.OpBitFieldSExtract(ctx.U32[1], base, offset, count)};
138 SetZeroFlag(ctx, inst, result);
139 SetSignFlag(ctx, inst, result);
140 return result;
141}
142
143Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) {
144 const Id result{ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count)};
145 SetZeroFlag(ctx, inst, result);
146 SetSignFlag(ctx, inst, result);
147 return result;
148}
149
150Id EmitBitReverse32(EmitContext& ctx, Id value) {
151 return ctx.OpBitReverse(ctx.U32[1], value);
152}
153
154Id EmitBitCount32(EmitContext& ctx, Id value) {
155 return ctx.OpBitCount(ctx.U32[1], value);
156}
157
158Id EmitBitwiseNot32(EmitContext& ctx, Id value) {
159 return ctx.OpNot(ctx.U32[1], value);
160}
161
162Id EmitFindSMsb32(EmitContext& ctx, Id value) {
163 return ctx.OpFindSMsb(ctx.U32[1], value);
164}
165
166Id EmitFindUMsb32(EmitContext& ctx, Id value) {
167 return ctx.OpFindUMsb(ctx.U32[1], value);
168}
169
170Id EmitSMin32(EmitContext& ctx, Id a, Id b) {
171 const bool is_broken{ctx.profile.has_broken_signed_operations};
172 if (is_broken) {
173 a = ctx.OpBitcast(ctx.S32[1], a);
174 b = ctx.OpBitcast(ctx.S32[1], b);
175 }
176 const Id result{ctx.OpSMin(ctx.U32[1], a, b)};
177 return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result;
178}
179
180Id EmitUMin32(EmitContext& ctx, Id a, Id b) {
181 return ctx.OpUMin(ctx.U32[1], a, b);
182}
183
184Id EmitSMax32(EmitContext& ctx, Id a, Id b) {
185 const bool is_broken{ctx.profile.has_broken_signed_operations};
186 if (is_broken) {
187 a = ctx.OpBitcast(ctx.S32[1], a);
188 b = ctx.OpBitcast(ctx.S32[1], b);
189 }
190 const Id result{ctx.OpSMax(ctx.U32[1], a, b)};
191 return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result;
192}
193
194Id EmitUMax32(EmitContext& ctx, Id a, Id b) {
195 return ctx.OpUMax(ctx.U32[1], a, b);
196}
197
198Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) {
199 Id result{};
200 if (ctx.profile.has_broken_signed_operations || ctx.profile.has_broken_spirv_clamp) {
201 value = ctx.OpBitcast(ctx.S32[1], value);
202 min = ctx.OpBitcast(ctx.S32[1], min);
203 max = ctx.OpBitcast(ctx.S32[1], max);
204 if (ctx.profile.has_broken_spirv_clamp) {
205 result = ctx.OpSMax(ctx.S32[1], ctx.OpSMin(ctx.S32[1], value, max), min);
206 } else {
207 result = ctx.OpSClamp(ctx.S32[1], value, min, max);
208 }
209 result = ctx.OpBitcast(ctx.U32[1], result);
210 } else {
211 result = ctx.OpSClamp(ctx.U32[1], value, min, max);
212 }
213 SetZeroFlag(ctx, inst, result);
214 SetSignFlag(ctx, inst, result);
215 return result;
216}
217
218Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) {
219 Id result{};
220 if (ctx.profile.has_broken_spirv_clamp) {
221 result = ctx.OpUMax(ctx.U32[1], ctx.OpUMin(ctx.U32[1], value, max), min);
222 } else {
223 result = ctx.OpUClamp(ctx.U32[1], value, min, max);
224 }
225 SetZeroFlag(ctx, inst, result);
226 SetSignFlag(ctx, inst, result);
227 return result;
228}
229
230Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) {
231 return ctx.OpSLessThan(ctx.U1, lhs, rhs);
232}
233
234Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) {
235 return ctx.OpULessThan(ctx.U1, lhs, rhs);
236}
237
238Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs) {
239 return ctx.OpIEqual(ctx.U1, lhs, rhs);
240}
241
242Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
243 return ctx.OpSLessThanEqual(ctx.U1, lhs, rhs);
244}
245
246Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
247 return ctx.OpULessThanEqual(ctx.U1, lhs, rhs);
248}
249
250Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
251 return ctx.OpSGreaterThan(ctx.U1, lhs, rhs);
252}
253
254Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
255 return ctx.OpUGreaterThan(ctx.U1, lhs, rhs);
256}
257
258Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs) {
259 return ctx.OpINotEqual(ctx.U1, lhs, rhs);
260}
261
262Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
263 return ctx.OpSGreaterThanEqual(ctx.U1, lhs, rhs);
264}
265
266Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
267 return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs);
268}
269
270} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
new file mode 100644
index 000000000..b9a9500fc
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
@@ -0,0 +1,26 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9
10Id EmitLogicalOr(EmitContext& ctx, Id a, Id b) {
11 return ctx.OpLogicalOr(ctx.U1, a, b);
12}
13
14Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b) {
15 return ctx.OpLogicalAnd(ctx.U1, a, b);
16}
17
18Id EmitLogicalXor(EmitContext& ctx, Id a, Id b) {
19 return ctx.OpLogicalNotEqual(ctx.U1, a, b);
20}
21
22Id EmitLogicalNot(EmitContext& ctx, Id value) {
23 return ctx.OpLogicalNot(ctx.U1, value);
24}
25
26} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
new file mode 100644
index 000000000..679ee2684
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
@@ -0,0 +1,275 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <bit>
6
7#include "shader_recompiler/backend/spirv/emit_spirv.h"
8#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
9
10namespace Shader::Backend::SPIRV {
11namespace {
12Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size,
13 u32 index_offset = 0) {
14 if (offset.IsImmediate()) {
15 const u32 imm_offset{static_cast<u32>(offset.U32() / element_size) + index_offset};
16 return ctx.Const(imm_offset);
17 }
18 const u32 shift{static_cast<u32>(std::countr_zero(element_size))};
19 Id index{ctx.Def(offset)};
20 if (shift != 0) {
21 const Id shift_id{ctx.Const(shift)};
22 index = ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id);
23 }
24 if (index_offset != 0) {
25 index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset));
26 }
27 return index;
28}
29
30Id StoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
31 const StorageTypeDefinition& type_def, size_t element_size,
32 Id StorageDefinitions::*member_ptr, u32 index_offset = 0) {
33 if (!binding.IsImmediate()) {
34 throw NotImplementedException("Dynamic storage buffer indexing");
35 }
36 const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr};
37 const Id index{StorageIndex(ctx, offset, element_size, index_offset)};
38 return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index);
39}
40
41Id LoadStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id result_type,
42 const StorageTypeDefinition& type_def, size_t element_size,
43 Id StorageDefinitions::*member_ptr, u32 index_offset = 0) {
44 const Id pointer{
45 StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)};
46 return ctx.OpLoad(result_type, pointer);
47}
48
49Id LoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
50 u32 index_offset = 0) {
51 return LoadStorage(ctx, binding, offset, ctx.U32[1], ctx.storage_types.U32, sizeof(u32),
52 &StorageDefinitions::U32, index_offset);
53}
54
55void WriteStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
56 const StorageTypeDefinition& type_def, size_t element_size,
57 Id StorageDefinitions::*member_ptr, u32 index_offset = 0) {
58 const Id pointer{
59 StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)};
60 ctx.OpStore(pointer, value);
61}
62
63void WriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
64 u32 index_offset = 0) {
65 WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32),
66 &StorageDefinitions::U32, index_offset);
67}
68} // Anonymous namespace
69
70void EmitLoadGlobalU8(EmitContext&) {
71 throw NotImplementedException("SPIR-V Instruction");
72}
73
74void EmitLoadGlobalS8(EmitContext&) {
75 throw NotImplementedException("SPIR-V Instruction");
76}
77
78void EmitLoadGlobalU16(EmitContext&) {
79 throw NotImplementedException("SPIR-V Instruction");
80}
81
82void EmitLoadGlobalS16(EmitContext&) {
83 throw NotImplementedException("SPIR-V Instruction");
84}
85
86Id EmitLoadGlobal32(EmitContext& ctx, Id address) {
87 if (ctx.profile.support_int64) {
88 return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address);
89 }
90 LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
91 return ctx.Const(0u);
92}
93
94Id EmitLoadGlobal64(EmitContext& ctx, Id address) {
95 if (ctx.profile.support_int64) {
96 return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address);
97 }
98 LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
99 return ctx.Const(0u, 0u);
100}
101
102Id EmitLoadGlobal128(EmitContext& ctx, Id address) {
103 if (ctx.profile.support_int64) {
104 return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address);
105 }
106 LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
107 return ctx.Const(0u, 0u, 0u, 0u);
108}
109
110void EmitWriteGlobalU8(EmitContext&) {
111 throw NotImplementedException("SPIR-V Instruction");
112}
113
114void EmitWriteGlobalS8(EmitContext&) {
115 throw NotImplementedException("SPIR-V Instruction");
116}
117
118void EmitWriteGlobalU16(EmitContext&) {
119 throw NotImplementedException("SPIR-V Instruction");
120}
121
122void EmitWriteGlobalS16(EmitContext&) {
123 throw NotImplementedException("SPIR-V Instruction");
124}
125
126void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) {
127 if (ctx.profile.support_int64) {
128 ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value);
129 return;
130 }
131 LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
132}
133
134void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) {
135 if (ctx.profile.support_int64) {
136 ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value);
137 return;
138 }
139 LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
140}
141
142void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) {
143 if (ctx.profile.support_int64) {
144 ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value);
145 return;
146 }
147 LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
148}
149
150Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
151 if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) {
152 return ctx.OpUConvert(ctx.U32[1],
153 LoadStorage(ctx, binding, offset, ctx.U8, ctx.storage_types.U8,
154 sizeof(u8), &StorageDefinitions::U8));
155 } else {
156 return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
157 ctx.BitOffset8(offset), ctx.Const(8u));
158 }
159}
160
161Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
162 if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) {
163 return ctx.OpSConvert(ctx.U32[1],
164 LoadStorage(ctx, binding, offset, ctx.S8, ctx.storage_types.S8,
165 sizeof(s8), &StorageDefinitions::S8));
166 } else {
167 return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
168 ctx.BitOffset8(offset), ctx.Const(8u));
169 }
170}
171
172Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
173 if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) {
174 return ctx.OpUConvert(ctx.U32[1],
175 LoadStorage(ctx, binding, offset, ctx.U16, ctx.storage_types.U16,
176 sizeof(u16), &StorageDefinitions::U16));
177 } else {
178 return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
179 ctx.BitOffset16(offset), ctx.Const(16u));
180 }
181}
182
183Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
184 if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) {
185 return ctx.OpSConvert(ctx.U32[1],
186 LoadStorage(ctx, binding, offset, ctx.S16, ctx.storage_types.S16,
187 sizeof(s16), &StorageDefinitions::S16));
188 } else {
189 return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
190 ctx.BitOffset16(offset), ctx.Const(16u));
191 }
192}
193
194Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
195 return LoadStorage32(ctx, binding, offset);
196}
197
198Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
199 if (ctx.profile.support_descriptor_aliasing) {
200 return LoadStorage(ctx, binding, offset, ctx.U32[2], ctx.storage_types.U32x2,
201 sizeof(u32[2]), &StorageDefinitions::U32x2);
202 } else {
203 return ctx.OpCompositeConstruct(ctx.U32[2], LoadStorage32(ctx, binding, offset, 0),
204 LoadStorage32(ctx, binding, offset, 1));
205 }
206}
207
208Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
209 if (ctx.profile.support_descriptor_aliasing) {
210 return LoadStorage(ctx, binding, offset, ctx.U32[4], ctx.storage_types.U32x4,
211 sizeof(u32[4]), &StorageDefinitions::U32x4);
212 } else {
213 return ctx.OpCompositeConstruct(ctx.U32[4], LoadStorage32(ctx, binding, offset, 0),
214 LoadStorage32(ctx, binding, offset, 1),
215 LoadStorage32(ctx, binding, offset, 2),
216 LoadStorage32(ctx, binding, offset, 3));
217 }
218}
219
220void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
221 Id value) {
222 WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8,
223 sizeof(u8), &StorageDefinitions::U8);
224}
225
226void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
227 Id value) {
228 WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8,
229 sizeof(s8), &StorageDefinitions::S8);
230}
231
232void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
233 Id value) {
234 WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16,
235 sizeof(u16), &StorageDefinitions::U16);
236}
237
238void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
239 Id value) {
240 WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16,
241 sizeof(s16), &StorageDefinitions::S16);
242}
243
244void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
245 Id value) {
246 WriteStorage32(ctx, binding, offset, value);
247}
248
249void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
250 Id value) {
251 if (ctx.profile.support_descriptor_aliasing) {
252 WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x2, sizeof(u32[2]),
253 &StorageDefinitions::U32x2);
254 } else {
255 for (u32 index = 0; index < 2; ++index) {
256 const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)};
257 WriteStorage32(ctx, binding, offset, element, index);
258 }
259 }
260}
261
262void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
263 Id value) {
264 if (ctx.profile.support_descriptor_aliasing) {
265 WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x4, sizeof(u32[4]),
266 &StorageDefinitions::U32x4);
267 } else {
268 for (u32 index = 0; index < 4; ++index) {
269 const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)};
270 WriteStorage32(ctx, binding, offset, element, index);
271 }
272 }
273}
274
275} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
new file mode 100644
index 000000000..c5b4f4720
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
@@ -0,0 +1,42 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9
10Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
11 return ctx.OpSelect(ctx.U1, cond, true_value, false_value);
12}
13
14Id EmitSelectU8(EmitContext&, Id, Id, Id) {
15 throw NotImplementedException("SPIR-V Instruction");
16}
17
18Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
19 return ctx.OpSelect(ctx.U16, cond, true_value, false_value);
20}
21
22Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
23 return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value);
24}
25
26Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
27 return ctx.OpSelect(ctx.U64, cond, true_value, false_value);
28}
29
30Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
31 return ctx.OpSelect(ctx.F16[1], cond, true_value, false_value);
32}
33
34Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
35 return ctx.OpSelect(ctx.F32[1], cond, true_value, false_value);
36}
37
38Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
39 return ctx.OpSelect(ctx.F64[1], cond, true_value, false_value);
40}
41
42} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
new file mode 100644
index 000000000..9a79fc7a2
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
@@ -0,0 +1,174 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9namespace {
10Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) {
11 const Id shift_id{ctx.Const(shift)};
12 const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
13 return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
14}
15
16Id Word(EmitContext& ctx, Id offset) {
17 const Id shift_id{ctx.Const(2U)};
18 const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
19 const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
20 return ctx.OpLoad(ctx.U32[1], pointer);
21}
22
23std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) {
24 const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Const(3U))};
25 const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(mask))};
26 const Id count_id{ctx.Const(count)};
27 return {bit, count_id};
28}
29} // Anonymous namespace
30
31Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
32 if (ctx.profile.support_explicit_workgroup_layout) {
33 const Id pointer{
34 ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
35 return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
36 } else {
37 const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
38 return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
39 }
40}
41
42Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
43 if (ctx.profile.support_explicit_workgroup_layout) {
44 const Id pointer{
45 ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
46 return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
47 } else {
48 const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
49 return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
50 }
51}
52
53Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
54 if (ctx.profile.support_explicit_workgroup_layout) {
55 const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
56 return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
57 } else {
58 const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
59 return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
60 }
61}
62
63Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
64 if (ctx.profile.support_explicit_workgroup_layout) {
65 const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
66 return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
67 } else {
68 const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
69 return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
70 }
71}
72
73Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
74 if (ctx.profile.support_explicit_workgroup_layout) {
75 const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)};
76 return ctx.OpLoad(ctx.U32[1], pointer);
77 } else {
78 return Word(ctx, offset);
79 }
80}
81
82Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
83 if (ctx.profile.support_explicit_workgroup_layout) {
84 const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
85 return ctx.OpLoad(ctx.U32[2], pointer);
86 } else {
87 const Id shift_id{ctx.Const(2U)};
88 const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
89 const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(1U))};
90 const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
91 const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
92 return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
93 ctx.OpLoad(ctx.U32[1], rhs_pointer));
94 }
95}
96
97Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
98 if (ctx.profile.support_explicit_workgroup_layout) {
99 const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
100 return ctx.OpLoad(ctx.U32[4], pointer);
101 }
102 const Id shift_id{ctx.Const(2U)};
103 const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
104 std::array<Id, 4> values{};
105 for (u32 i = 0; i < 4; ++i) {
106 const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))};
107 const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
108 values[i] = ctx.OpLoad(ctx.U32[1], pointer);
109 }
110 return ctx.OpCompositeConstruct(ctx.U32[4], values);
111}
112
113void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
114 if (ctx.profile.support_explicit_workgroup_layout) {
115 const Id pointer{
116 ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
117 ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
118 } else {
119 ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value);
120 }
121}
122
123void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
124 if (ctx.profile.support_explicit_workgroup_layout) {
125 const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
126 ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
127 } else {
128 ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value);
129 }
130}
131
132void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
133 Id pointer{};
134 if (ctx.profile.support_explicit_workgroup_layout) {
135 pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2);
136 } else {
137 const Id shift{ctx.Const(2U)};
138 const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
139 pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
140 }
141 ctx.OpStore(pointer, value);
142}
143
144void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
145 if (ctx.profile.support_explicit_workgroup_layout) {
146 const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
147 ctx.OpStore(pointer, value);
148 return;
149 }
150 const Id shift{ctx.Const(2U)};
151 const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
152 const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Const(1U))};
153 const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
154 const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
155 ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
156 ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
157}
158
159void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
160 if (ctx.profile.support_explicit_workgroup_layout) {
161 const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
162 ctx.OpStore(pointer, value);
163 return;
164 }
165 const Id shift{ctx.Const(2U)};
166 const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
167 for (u32 i = 0; i < 4; ++i) {
168 const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))};
169 const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
170 ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
171 }
172}
173
174} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
new file mode 100644
index 000000000..9e7eb3cb1
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
@@ -0,0 +1,150 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9namespace {
10void ConvertDepthMode(EmitContext& ctx) {
11 const Id type{ctx.F32[1]};
12 const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)};
13 const Id z{ctx.OpCompositeExtract(type, position, 2u)};
14 const Id w{ctx.OpCompositeExtract(type, position, 3u)};
15 const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))};
16 const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)};
17 ctx.OpStore(ctx.output_position, vector);
18}
19
20void SetFixedPipelinePointSize(EmitContext& ctx) {
21 if (ctx.runtime_info.fixed_state_point_size) {
22 const float point_size{*ctx.runtime_info.fixed_state_point_size};
23 ctx.OpStore(ctx.output_point_size, ctx.Const(point_size));
24 }
25}
26
27Id DefaultVarying(EmitContext& ctx, u32 num_components, u32 element, Id zero, Id one,
28 Id default_vector) {
29 switch (num_components) {
30 case 1:
31 return element == 3 ? one : zero;
32 case 2:
33 return ctx.ConstantComposite(ctx.F32[2], zero, element + 1 == 3 ? one : zero);
34 case 3:
35 return ctx.ConstantComposite(ctx.F32[3], zero, zero, element + 2 == 3 ? one : zero);
36 case 4:
37 return default_vector;
38 }
39 throw InvalidArgument("Bad element");
40}
41
42Id ComparisonFunction(EmitContext& ctx, CompareFunction comparison, Id operand_1, Id operand_2) {
43 switch (comparison) {
44 case CompareFunction::Never:
45 return ctx.false_value;
46 case CompareFunction::Less:
47 return ctx.OpFOrdLessThan(ctx.U1, operand_1, operand_2);
48 case CompareFunction::Equal:
49 return ctx.OpFOrdEqual(ctx.U1, operand_1, operand_2);
50 case CompareFunction::LessThanEqual:
51 return ctx.OpFOrdLessThanEqual(ctx.U1, operand_1, operand_2);
52 case CompareFunction::Greater:
53 return ctx.OpFOrdGreaterThan(ctx.U1, operand_1, operand_2);
54 case CompareFunction::NotEqual:
55 return ctx.OpFOrdNotEqual(ctx.U1, operand_1, operand_2);
56 case CompareFunction::GreaterThanEqual:
57 return ctx.OpFOrdGreaterThanEqual(ctx.U1, operand_1, operand_2);
58 case CompareFunction::Always:
59 return ctx.true_value;
60 }
61 throw InvalidArgument("Comparison function {}", comparison);
62}
63
64void AlphaTest(EmitContext& ctx) {
65 if (!ctx.runtime_info.alpha_test_func) {
66 return;
67 }
68 const auto comparison{*ctx.runtime_info.alpha_test_func};
69 if (comparison == CompareFunction::Always) {
70 return;
71 }
72 if (!Sirit::ValidId(ctx.frag_color[0])) {
73 return;
74 }
75
76 const Id type{ctx.F32[1]};
77 const Id rt0_color{ctx.OpLoad(ctx.F32[4], ctx.frag_color[0])};
78 const Id alpha{ctx.OpCompositeExtract(type, rt0_color, 3u)};
79
80 const Id true_label{ctx.OpLabel()};
81 const Id discard_label{ctx.OpLabel()};
82 const Id alpha_reference{ctx.Const(ctx.runtime_info.alpha_test_reference)};
83 const Id condition{ComparisonFunction(ctx, comparison, alpha, alpha_reference)};
84
85 ctx.OpSelectionMerge(true_label, spv::SelectionControlMask::MaskNone);
86 ctx.OpBranchConditional(condition, true_label, discard_label);
87 ctx.AddLabel(discard_label);
88 ctx.OpKill();
89 ctx.AddLabel(true_label);
90}
91} // Anonymous namespace
92
93void EmitPrologue(EmitContext& ctx) {
94 if (ctx.stage == Stage::VertexB) {
95 const Id zero{ctx.Const(0.0f)};
96 const Id one{ctx.Const(1.0f)};
97 const Id default_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one)};
98 ctx.OpStore(ctx.output_position, default_vector);
99 for (const auto& info : ctx.output_generics) {
100 if (info[0].num_components == 0) {
101 continue;
102 }
103 u32 element{0};
104 while (element < 4) {
105 const auto& element_info{info[element]};
106 const u32 num{element_info.num_components};
107 const Id value{DefaultVarying(ctx, num, element, zero, one, default_vector)};
108 ctx.OpStore(element_info.id, value);
109 element += num;
110 }
111 }
112 }
113 if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) {
114 SetFixedPipelinePointSize(ctx);
115 }
116}
117
118void EmitEpilogue(EmitContext& ctx) {
119 if (ctx.stage == Stage::VertexB && ctx.runtime_info.convert_depth_mode) {
120 ConvertDepthMode(ctx);
121 }
122 if (ctx.stage == Stage::Fragment) {
123 AlphaTest(ctx);
124 }
125}
126
127void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
128 if (ctx.runtime_info.convert_depth_mode) {
129 ConvertDepthMode(ctx);
130 }
131 if (stream.IsImmediate()) {
132 ctx.OpEmitStreamVertex(ctx.Def(stream));
133 } else {
134 LOG_WARNING(Shader_SPIRV, "Stream is not immediate");
135 ctx.OpEmitStreamVertex(ctx.u32_zero_value);
136 }
137 // Restore fixed pipeline point size after emitting the vertex
138 SetFixedPipelinePointSize(ctx);
139}
140
141void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
142 if (stream.IsImmediate()) {
143 ctx.OpEndStreamPrimitive(ctx.Def(stream));
144 } else {
145 LOG_WARNING(Shader_SPIRV, "Stream is not immediate");
146 ctx.OpEndStreamPrimitive(ctx.u32_zero_value);
147 }
148}
149
150} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
new file mode 100644
index 000000000..c9f469e90
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
@@ -0,0 +1,30 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9
10Id EmitUndefU1(EmitContext& ctx) {
11 return ctx.OpUndef(ctx.U1);
12}
13
14Id EmitUndefU8(EmitContext&) {
15 throw NotImplementedException("SPIR-V Instruction");
16}
17
18Id EmitUndefU16(EmitContext&) {
19 throw NotImplementedException("SPIR-V Instruction");
20}
21
22Id EmitUndefU32(EmitContext& ctx) {
23 return ctx.OpUndef(ctx.U32[1]);
24}
25
26Id EmitUndefU64(EmitContext&) {
27 throw NotImplementedException("SPIR-V Instruction");
28}
29
30} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
new file mode 100644
index 000000000..78b1e1ba7
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -0,0 +1,203 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
7
8namespace Shader::Backend::SPIRV {
9namespace {
10Id WarpExtract(EmitContext& ctx, Id value) {
11 const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
12 return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
13}
14
15Id LoadMask(EmitContext& ctx, Id mask) {
16 const Id value{ctx.OpLoad(ctx.U32[4], mask)};
17 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
18 return ctx.OpCompositeExtract(ctx.U32[1], value, 0U);
19 }
20 return WarpExtract(ctx, value);
21}
22
23void SetInBoundsFlag(IR::Inst* inst, Id result) {
24 IR::Inst* const in_bounds{inst->GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
25 if (!in_bounds) {
26 return;
27 }
28 in_bounds->SetDefinition(result);
29 in_bounds->Invalidate();
30}
31
32Id ComputeMinThreadId(EmitContext& ctx, Id thread_id, Id segmentation_mask) {
33 return ctx.OpBitwiseAnd(ctx.U32[1], thread_id, segmentation_mask);
34}
35
36Id ComputeMaxThreadId(EmitContext& ctx, Id min_thread_id, Id clamp, Id not_seg_mask) {
37 return ctx.OpBitwiseOr(ctx.U32[1], min_thread_id,
38 ctx.OpBitwiseAnd(ctx.U32[1], clamp, not_seg_mask));
39}
40
41Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask) {
42 const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
43 const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
44 return ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask);
45}
46
47Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
48 return ctx.OpSelect(ctx.U32[1], in_range,
49 ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value);
50}
51} // Anonymous namespace
52
53Id EmitLaneId(EmitContext& ctx) {
54 const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
55 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
56 return id;
57 }
58 return ctx.OpBitwiseAnd(ctx.U32[1], id, ctx.Const(31U));
59}
60
61Id EmitVoteAll(EmitContext& ctx, Id pred) {
62 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
63 return ctx.OpSubgroupAllKHR(ctx.U1, pred);
64 }
65 const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
66 const Id active_mask{WarpExtract(ctx, mask_ballot)};
67 const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
68 const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
69 return ctx.OpIEqual(ctx.U1, lhs, active_mask);
70}
71
72Id EmitVoteAny(EmitContext& ctx, Id pred) {
73 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
74 return ctx.OpSubgroupAnyKHR(ctx.U1, pred);
75 }
76 const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
77 const Id active_mask{WarpExtract(ctx, mask_ballot)};
78 const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
79 const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
80 return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value);
81}
82
83Id EmitVoteEqual(EmitContext& ctx, Id pred) {
84 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
85 return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred);
86 }
87 const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
88 const Id active_mask{WarpExtract(ctx, mask_ballot)};
89 const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
90 const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)};
91 return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value),
92 ctx.OpIEqual(ctx.U1, lhs, active_mask));
93}
94
95Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
96 const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)};
97 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
98 return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U);
99 }
100 return WarpExtract(ctx, ballot);
101}
102
103Id EmitSubgroupEqMask(EmitContext& ctx) {
104 return LoadMask(ctx, ctx.subgroup_mask_eq);
105}
106
107Id EmitSubgroupLtMask(EmitContext& ctx) {
108 return LoadMask(ctx, ctx.subgroup_mask_lt);
109}
110
111Id EmitSubgroupLeMask(EmitContext& ctx) {
112 return LoadMask(ctx, ctx.subgroup_mask_le);
113}
114
115Id EmitSubgroupGtMask(EmitContext& ctx) {
116 return LoadMask(ctx, ctx.subgroup_mask_gt);
117}
118
119Id EmitSubgroupGeMask(EmitContext& ctx) {
120 return LoadMask(ctx, ctx.subgroup_mask_ge);
121}
122
123Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
124 Id segmentation_mask) {
125 const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
126 const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
127 const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
128 const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
129
130 const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)};
131 const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
132 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
133
134 SetInBoundsFlag(inst, in_range);
135 return SelectValue(ctx, in_range, value, src_thread_id);
136}
137
138Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
139 Id segmentation_mask) {
140 const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
141 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
142 const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
143 const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
144
145 SetInBoundsFlag(inst, in_range);
146 return SelectValue(ctx, in_range, value, src_thread_id);
147}
148
149Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
150 Id segmentation_mask) {
151 const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
152 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
153 const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
154 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
155
156 SetInBoundsFlag(inst, in_range);
157 return SelectValue(ctx, in_range, value, src_thread_id);
158}
159
160Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
161 Id segmentation_mask) {
162 const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
163 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
164 const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
165 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
166
167 SetInBoundsFlag(inst, in_range);
168 return SelectValue(ctx, in_range, value, src_thread_id);
169}
170
171Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) {
172 const Id three{ctx.Const(3U)};
173 Id mask{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
174 mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three);
175 mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Const(1U));
176 mask = ctx.OpShiftRightLogical(ctx.U32[1], swizzle, mask);
177 mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three);
178
179 const Id modifier_a{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_a, mask)};
180 const Id modifier_b{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_b, mask)};
181
182 const Id result_a{ctx.OpFMul(ctx.F32[1], op_a, modifier_a)};
183 const Id result_b{ctx.OpFMul(ctx.F32[1], op_b, modifier_b)};
184 return ctx.OpFAdd(ctx.F32[1], result_a, result_b);
185}
186
187Id EmitDPdxFine(EmitContext& ctx, Id op_a) {
188 return ctx.OpDPdxFine(ctx.F32[1], op_a);
189}
190
191Id EmitDPdyFine(EmitContext& ctx, Id op_a) {
192 return ctx.OpDPdyFine(ctx.F32[1], op_a);
193}
194
195Id EmitDPdxCoarse(EmitContext& ctx, Id op_a) {
196 return ctx.OpDPdxCoarse(ctx.F32[1], op_a);
197}
198
199Id EmitDPdyCoarse(EmitContext& ctx, Id op_a) {
200 return ctx.OpDPdyCoarse(ctx.F32[1], op_a);
201}
202
203} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h
new file mode 100644
index 000000000..8369d0d84
--- /dev/null
+++ b/src/shader_recompiler/environment.h
@@ -0,0 +1,53 @@
1#pragma once
2
3#include <array>
4
5#include "common/common_types.h"
6#include "shader_recompiler/program_header.h"
7#include "shader_recompiler/shader_info.h"
8#include "shader_recompiler/stage.h"
9
10namespace Shader {
11
12class Environment {
13public:
14 virtual ~Environment() = default;
15
16 [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0;
17
18 [[nodiscard]] virtual u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) = 0;
19
20 [[nodiscard]] virtual TextureType ReadTextureType(u32 raw_handle) = 0;
21
22 [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0;
23
24 [[nodiscard]] virtual u32 LocalMemorySize() const = 0;
25
26 [[nodiscard]] virtual u32 SharedMemorySize() const = 0;
27
28 [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0;
29
30 [[nodiscard]] const ProgramHeader& SPH() const noexcept {
31 return sph;
32 }
33
34 [[nodiscard]] const std::array<u32, 8>& GpPassthroughMask() const noexcept {
35 return gp_passthrough_mask;
36 }
37
38 [[nodiscard]] Stage ShaderStage() const noexcept {
39 return stage;
40 }
41
42 [[nodiscard]] u32 StartAddress() const noexcept {
43 return start_address;
44 }
45
46protected:
47 ProgramHeader sph{};
48 std::array<u32, 8> gp_passthrough_mask{};
49 Stage stage{};
50 u32 start_address{};
51};
52
53} // namespace Shader
diff --git a/src/shader_recompiler/exception.h b/src/shader_recompiler/exception.h
new file mode 100644
index 000000000..337e7f0c8
--- /dev/null
+++ b/src/shader_recompiler/exception.h
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <stdexcept>
8#include <string>
9#include <string_view>
10#include <utility>
11
12#include <fmt/format.h>
13
14namespace Shader {
15
16class Exception : public std::exception {
17public:
18 explicit Exception(std::string message) noexcept : err_message{std::move(message)} {}
19
20 const char* what() const noexcept override {
21 return err_message.c_str();
22 }
23
24 void Prepend(std::string_view prepend) {
25 err_message.insert(0, prepend);
26 }
27
28 void Append(std::string_view append) {
29 err_message += append;
30 }
31
32private:
33 std::string err_message;
34};
35
36class LogicError : public Exception {
37public:
38 template <typename... Args>
39 LogicError(const char* message, Args&&... args)
40 : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
41};
42
43class RuntimeError : public Exception {
44public:
45 template <typename... Args>
46 RuntimeError(const char* message, Args&&... args)
47 : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
48};
49
50class NotImplementedException : public Exception {
51public:
52 template <typename... Args>
53 NotImplementedException(const char* message, Args&&... args)
54 : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {
55 Append(" is not implemented");
56 }
57};
58
59class InvalidArgument : public Exception {
60public:
61 template <typename... Args>
62 InvalidArgument(const char* message, Args&&... args)
63 : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
64};
65
66} // namespace Shader
diff --git a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h
new file mode 100644
index 000000000..b61773487
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h
@@ -0,0 +1,58 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::IR {
12
13class Block;
14
15struct AbstractSyntaxNode {
16 enum class Type {
17 Block,
18 If,
19 EndIf,
20 Loop,
21 Repeat,
22 Break,
23 Return,
24 Unreachable,
25 };
26 union Data {
27 Block* block;
28 struct {
29 U1 cond;
30 Block* body;
31 Block* merge;
32 } if_node;
33 struct {
34 Block* merge;
35 } end_if;
36 struct {
37 Block* body;
38 Block* continue_block;
39 Block* merge;
40 } loop;
41 struct {
42 U1 cond;
43 Block* loop_header;
44 Block* merge;
45 } repeat;
46 struct {
47 U1 cond;
48 Block* merge;
49 Block* skip;
50 } break_node;
51 };
52
53 Data data{};
54 Type type{};
55};
56using AbstractSyntaxList = std::vector<AbstractSyntaxNode>;
57
58} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp
new file mode 100644
index 000000000..4d0b8b8e5
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/attribute.cpp
@@ -0,0 +1,454 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <fmt/format.h>
6
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/ir/attribute.h"
9
10namespace Shader::IR {
11
12bool IsGeneric(Attribute attribute) noexcept {
13 return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X;
14}
15
16u32 GenericAttributeIndex(Attribute attribute) {
17 if (!IsGeneric(attribute)) {
18 throw InvalidArgument("Attribute is not generic {}", attribute);
19 }
20 return (static_cast<u32>(attribute) - static_cast<u32>(Attribute::Generic0X)) / 4u;
21}
22
23u32 GenericAttributeElement(Attribute attribute) {
24 if (!IsGeneric(attribute)) {
25 throw InvalidArgument("Attribute is not generic {}", attribute);
26 }
27 return static_cast<u32>(attribute) % 4;
28}
29
30std::string NameOf(Attribute attribute) {
31 switch (attribute) {
32 case Attribute::PrimitiveId:
33 return "PrimitiveId";
34 case Attribute::Layer:
35 return "Layer";
36 case Attribute::ViewportIndex:
37 return "ViewportIndex";
38 case Attribute::PointSize:
39 return "PointSize";
40 case Attribute::PositionX:
41 return "Position.X";
42 case Attribute::PositionY:
43 return "Position.Y";
44 case Attribute::PositionZ:
45 return "Position.Z";
46 case Attribute::PositionW:
47 return "Position.W";
48 case Attribute::Generic0X:
49 return "Generic[0].X";
50 case Attribute::Generic0Y:
51 return "Generic[0].Y";
52 case Attribute::Generic0Z:
53 return "Generic[0].Z";
54 case Attribute::Generic0W:
55 return "Generic[0].W";
56 case Attribute::Generic1X:
57 return "Generic[1].X";
58 case Attribute::Generic1Y:
59 return "Generic[1].Y";
60 case Attribute::Generic1Z:
61 return "Generic[1].Z";
62 case Attribute::Generic1W:
63 return "Generic[1].W";
64 case Attribute::Generic2X:
65 return "Generic[2].X";
66 case Attribute::Generic2Y:
67 return "Generic[2].Y";
68 case Attribute::Generic2Z:
69 return "Generic[2].Z";
70 case Attribute::Generic2W:
71 return "Generic[2].W";
72 case Attribute::Generic3X:
73 return "Generic[3].X";
74 case Attribute::Generic3Y:
75 return "Generic[3].Y";
76 case Attribute::Generic3Z:
77 return "Generic[3].Z";
78 case Attribute::Generic3W:
79 return "Generic[3].W";
80 case Attribute::Generic4X:
81 return "Generic[4].X";
82 case Attribute::Generic4Y:
83 return "Generic[4].Y";
84 case Attribute::Generic4Z:
85 return "Generic[4].Z";
86 case Attribute::Generic4W:
87 return "Generic[4].W";
88 case Attribute::Generic5X:
89 return "Generic[5].X";
90 case Attribute::Generic5Y:
91 return "Generic[5].Y";
92 case Attribute::Generic5Z:
93 return "Generic[5].Z";
94 case Attribute::Generic5W:
95 return "Generic[5].W";
96 case Attribute::Generic6X:
97 return "Generic[6].X";
98 case Attribute::Generic6Y:
99 return "Generic[6].Y";
100 case Attribute::Generic6Z:
101 return "Generic[6].Z";
102 case Attribute::Generic6W:
103 return "Generic[6].W";
104 case Attribute::Generic7X:
105 return "Generic[7].X";
106 case Attribute::Generic7Y:
107 return "Generic[7].Y";
108 case Attribute::Generic7Z:
109 return "Generic[7].Z";
110 case Attribute::Generic7W:
111 return "Generic[7].W";
112 case Attribute::Generic8X:
113 return "Generic[8].X";
114 case Attribute::Generic8Y:
115 return "Generic[8].Y";
116 case Attribute::Generic8Z:
117 return "Generic[8].Z";
118 case Attribute::Generic8W:
119 return "Generic[8].W";
120 case Attribute::Generic9X:
121 return "Generic[9].X";
122 case Attribute::Generic9Y:
123 return "Generic[9].Y";
124 case Attribute::Generic9Z:
125 return "Generic[9].Z";
126 case Attribute::Generic9W:
127 return "Generic[9].W";
128 case Attribute::Generic10X:
129 return "Generic[10].X";
130 case Attribute::Generic10Y:
131 return "Generic[10].Y";
132 case Attribute::Generic10Z:
133 return "Generic[10].Z";
134 case Attribute::Generic10W:
135 return "Generic[10].W";
136 case Attribute::Generic11X:
137 return "Generic[11].X";
138 case Attribute::Generic11Y:
139 return "Generic[11].Y";
140 case Attribute::Generic11Z:
141 return "Generic[11].Z";
142 case Attribute::Generic11W:
143 return "Generic[11].W";
144 case Attribute::Generic12X:
145 return "Generic[12].X";
146 case Attribute::Generic12Y:
147 return "Generic[12].Y";
148 case Attribute::Generic12Z:
149 return "Generic[12].Z";
150 case Attribute::Generic12W:
151 return "Generic[12].W";
152 case Attribute::Generic13X:
153 return "Generic[13].X";
154 case Attribute::Generic13Y:
155 return "Generic[13].Y";
156 case Attribute::Generic13Z:
157 return "Generic[13].Z";
158 case Attribute::Generic13W:
159 return "Generic[13].W";
160 case Attribute::Generic14X:
161 return "Generic[14].X";
162 case Attribute::Generic14Y:
163 return "Generic[14].Y";
164 case Attribute::Generic14Z:
165 return "Generic[14].Z";
166 case Attribute::Generic14W:
167 return "Generic[14].W";
168 case Attribute::Generic15X:
169 return "Generic[15].X";
170 case Attribute::Generic15Y:
171 return "Generic[15].Y";
172 case Attribute::Generic15Z:
173 return "Generic[15].Z";
174 case Attribute::Generic15W:
175 return "Generic[15].W";
176 case Attribute::Generic16X:
177 return "Generic[16].X";
178 case Attribute::Generic16Y:
179 return "Generic[16].Y";
180 case Attribute::Generic16Z:
181 return "Generic[16].Z";
182 case Attribute::Generic16W:
183 return "Generic[16].W";
184 case Attribute::Generic17X:
185 return "Generic[17].X";
186 case Attribute::Generic17Y:
187 return "Generic[17].Y";
188 case Attribute::Generic17Z:
189 return "Generic[17].Z";
190 case Attribute::Generic17W:
191 return "Generic[17].W";
192 case Attribute::Generic18X:
193 return "Generic[18].X";
194 case Attribute::Generic18Y:
195 return "Generic[18].Y";
196 case Attribute::Generic18Z:
197 return "Generic[18].Z";
198 case Attribute::Generic18W:
199 return "Generic[18].W";
200 case Attribute::Generic19X:
201 return "Generic[19].X";
202 case Attribute::Generic19Y:
203 return "Generic[19].Y";
204 case Attribute::Generic19Z:
205 return "Generic[19].Z";
206 case Attribute::Generic19W:
207 return "Generic[19].W";
208 case Attribute::Generic20X:
209 return "Generic[20].X";
210 case Attribute::Generic20Y:
211 return "Generic[20].Y";
212 case Attribute::Generic20Z:
213 return "Generic[20].Z";
214 case Attribute::Generic20W:
215 return "Generic[20].W";
216 case Attribute::Generic21X:
217 return "Generic[21].X";
218 case Attribute::Generic21Y:
219 return "Generic[21].Y";
220 case Attribute::Generic21Z:
221 return "Generic[21].Z";
222 case Attribute::Generic21W:
223 return "Generic[21].W";
224 case Attribute::Generic22X:
225 return "Generic[22].X";
226 case Attribute::Generic22Y:
227 return "Generic[22].Y";
228 case Attribute::Generic22Z:
229 return "Generic[22].Z";
230 case Attribute::Generic22W:
231 return "Generic[22].W";
232 case Attribute::Generic23X:
233 return "Generic[23].X";
234 case Attribute::Generic23Y:
235 return "Generic[23].Y";
236 case Attribute::Generic23Z:
237 return "Generic[23].Z";
238 case Attribute::Generic23W:
239 return "Generic[23].W";
240 case Attribute::Generic24X:
241 return "Generic[24].X";
242 case Attribute::Generic24Y:
243 return "Generic[24].Y";
244 case Attribute::Generic24Z:
245 return "Generic[24].Z";
246 case Attribute::Generic24W:
247 return "Generic[24].W";
248 case Attribute::Generic25X:
249 return "Generic[25].X";
250 case Attribute::Generic25Y:
251 return "Generic[25].Y";
252 case Attribute::Generic25Z:
253 return "Generic[25].Z";
254 case Attribute::Generic25W:
255 return "Generic[25].W";
256 case Attribute::Generic26X:
257 return "Generic[26].X";
258 case Attribute::Generic26Y:
259 return "Generic[26].Y";
260 case Attribute::Generic26Z:
261 return "Generic[26].Z";
262 case Attribute::Generic26W:
263 return "Generic[26].W";
264 case Attribute::Generic27X:
265 return "Generic[27].X";
266 case Attribute::Generic27Y:
267 return "Generic[27].Y";
268 case Attribute::Generic27Z:
269 return "Generic[27].Z";
270 case Attribute::Generic27W:
271 return "Generic[27].W";
272 case Attribute::Generic28X:
273 return "Generic[28].X";
274 case Attribute::Generic28Y:
275 return "Generic[28].Y";
276 case Attribute::Generic28Z:
277 return "Generic[28].Z";
278 case Attribute::Generic28W:
279 return "Generic[28].W";
280 case Attribute::Generic29X:
281 return "Generic[29].X";
282 case Attribute::Generic29Y:
283 return "Generic[29].Y";
284 case Attribute::Generic29Z:
285 return "Generic[29].Z";
286 case Attribute::Generic29W:
287 return "Generic[29].W";
288 case Attribute::Generic30X:
289 return "Generic[30].X";
290 case Attribute::Generic30Y:
291 return "Generic[30].Y";
292 case Attribute::Generic30Z:
293 return "Generic[30].Z";
294 case Attribute::Generic30W:
295 return "Generic[30].W";
296 case Attribute::Generic31X:
297 return "Generic[31].X";
298 case Attribute::Generic31Y:
299 return "Generic[31].Y";
300 case Attribute::Generic31Z:
301 return "Generic[31].Z";
302 case Attribute::Generic31W:
303 return "Generic[31].W";
304 case Attribute::ColorFrontDiffuseR:
305 return "ColorFrontDiffuse.R";
306 case Attribute::ColorFrontDiffuseG:
307 return "ColorFrontDiffuse.G";
308 case Attribute::ColorFrontDiffuseB:
309 return "ColorFrontDiffuse.B";
310 case Attribute::ColorFrontDiffuseA:
311 return "ColorFrontDiffuse.A";
312 case Attribute::ColorFrontSpecularR:
313 return "ColorFrontSpecular.R";
314 case Attribute::ColorFrontSpecularG:
315 return "ColorFrontSpecular.G";
316 case Attribute::ColorFrontSpecularB:
317 return "ColorFrontSpecular.B";
318 case Attribute::ColorFrontSpecularA:
319 return "ColorFrontSpecular.A";
320 case Attribute::ColorBackDiffuseR:
321 return "ColorBackDiffuse.R";
322 case Attribute::ColorBackDiffuseG:
323 return "ColorBackDiffuse.G";
324 case Attribute::ColorBackDiffuseB:
325 return "ColorBackDiffuse.B";
326 case Attribute::ColorBackDiffuseA:
327 return "ColorBackDiffuse.A";
328 case Attribute::ColorBackSpecularR:
329 return "ColorBackSpecular.R";
330 case Attribute::ColorBackSpecularG:
331 return "ColorBackSpecular.G";
332 case Attribute::ColorBackSpecularB:
333 return "ColorBackSpecular.B";
334 case Attribute::ColorBackSpecularA:
335 return "ColorBackSpecular.A";
336 case Attribute::ClipDistance0:
337 return "ClipDistance[0]";
338 case Attribute::ClipDistance1:
339 return "ClipDistance[1]";
340 case Attribute::ClipDistance2:
341 return "ClipDistance[2]";
342 case Attribute::ClipDistance3:
343 return "ClipDistance[3]";
344 case Attribute::ClipDistance4:
345 return "ClipDistance[4]";
346 case Attribute::ClipDistance5:
347 return "ClipDistance[5]";
348 case Attribute::ClipDistance6:
349 return "ClipDistance[6]";
350 case Attribute::ClipDistance7:
351 return "ClipDistance[7]";
352 case Attribute::PointSpriteS:
353 return "PointSprite.S";
354 case Attribute::PointSpriteT:
355 return "PointSprite.T";
356 case Attribute::FogCoordinate:
357 return "FogCoordinate";
358 case Attribute::TessellationEvaluationPointU:
359 return "TessellationEvaluationPoint.U";
360 case Attribute::TessellationEvaluationPointV:
361 return "TessellationEvaluationPoint.V";
362 case Attribute::InstanceId:
363 return "InstanceId";
364 case Attribute::VertexId:
365 return "VertexId";
366 case Attribute::FixedFncTexture0S:
367 return "FixedFncTexture[0].S";
368 case Attribute::FixedFncTexture0T:
369 return "FixedFncTexture[0].T";
370 case Attribute::FixedFncTexture0R:
371 return "FixedFncTexture[0].R";
372 case Attribute::FixedFncTexture0Q:
373 return "FixedFncTexture[0].Q";
374 case Attribute::FixedFncTexture1S:
375 return "FixedFncTexture[1].S";
376 case Attribute::FixedFncTexture1T:
377 return "FixedFncTexture[1].T";
378 case Attribute::FixedFncTexture1R:
379 return "FixedFncTexture[1].R";
380 case Attribute::FixedFncTexture1Q:
381 return "FixedFncTexture[1].Q";
382 case Attribute::FixedFncTexture2S:
383 return "FixedFncTexture[2].S";
384 case Attribute::FixedFncTexture2T:
385 return "FixedFncTexture[2].T";
386 case Attribute::FixedFncTexture2R:
387 return "FixedFncTexture[2].R";
388 case Attribute::FixedFncTexture2Q:
389 return "FixedFncTexture[2].Q";
390 case Attribute::FixedFncTexture3S:
391 return "FixedFncTexture[3].S";
392 case Attribute::FixedFncTexture3T:
393 return "FixedFncTexture[3].T";
394 case Attribute::FixedFncTexture3R:
395 return "FixedFncTexture[3].R";
396 case Attribute::FixedFncTexture3Q:
397 return "FixedFncTexture[3].Q";
398 case Attribute::FixedFncTexture4S:
399 return "FixedFncTexture[4].S";
400 case Attribute::FixedFncTexture4T:
401 return "FixedFncTexture[4].T";
402 case Attribute::FixedFncTexture4R:
403 return "FixedFncTexture[4].R";
404 case Attribute::FixedFncTexture4Q:
405 return "FixedFncTexture[4].Q";
406 case Attribute::FixedFncTexture5S:
407 return "FixedFncTexture[5].S";
408 case Attribute::FixedFncTexture5T:
409 return "FixedFncTexture[5].T";
410 case Attribute::FixedFncTexture5R:
411 return "FixedFncTexture[5].R";
412 case Attribute::FixedFncTexture5Q:
413 return "FixedFncTexture[5].Q";
414 case Attribute::FixedFncTexture6S:
415 return "FixedFncTexture[6].S";
416 case Attribute::FixedFncTexture6T:
417 return "FixedFncTexture[6].T";
418 case Attribute::FixedFncTexture6R:
419 return "FixedFncTexture[6].R";
420 case Attribute::FixedFncTexture6Q:
421 return "FixedFncTexture[6].Q";
422 case Attribute::FixedFncTexture7S:
423 return "FixedFncTexture[7].S";
424 case Attribute::FixedFncTexture7T:
425 return "FixedFncTexture[7].T";
426 case Attribute::FixedFncTexture7R:
427 return "FixedFncTexture[7].R";
428 case Attribute::FixedFncTexture7Q:
429 return "FixedFncTexture[7].Q";
430 case Attribute::FixedFncTexture8S:
431 return "FixedFncTexture[8].S";
432 case Attribute::FixedFncTexture8T:
433 return "FixedFncTexture[8].T";
434 case Attribute::FixedFncTexture8R:
435 return "FixedFncTexture[8].R";
436 case Attribute::FixedFncTexture8Q:
437 return "FixedFncTexture[8].Q";
438 case Attribute::FixedFncTexture9S:
439 return "FixedFncTexture[9].S";
440 case Attribute::FixedFncTexture9T:
441 return "FixedFncTexture[9].T";
442 case Attribute::FixedFncTexture9R:
443 return "FixedFncTexture[9].R";
444 case Attribute::FixedFncTexture9Q:
445 return "FixedFncTexture[9].Q";
446 case Attribute::ViewportMask:
447 return "ViewportMask";
448 case Attribute::FrontFace:
449 return "FrontFace";
450 }
451 return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
452}
453
454} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h
new file mode 100644
index 000000000..ca1199494
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/attribute.h
@@ -0,0 +1,250 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <fmt/format.h>
8
9#include "common/common_types.h"
10
11namespace Shader::IR {
12
13enum class Attribute : u64 {
14 PrimitiveId = 24,
15 Layer = 25,
16 ViewportIndex = 26,
17 PointSize = 27,
18 PositionX = 28,
19 PositionY = 29,
20 PositionZ = 30,
21 PositionW = 31,
22 Generic0X = 32,
23 Generic0Y = 33,
24 Generic0Z = 34,
25 Generic0W = 35,
26 Generic1X = 36,
27 Generic1Y = 37,
28 Generic1Z = 38,
29 Generic1W = 39,
30 Generic2X = 40,
31 Generic2Y = 41,
32 Generic2Z = 42,
33 Generic2W = 43,
34 Generic3X = 44,
35 Generic3Y = 45,
36 Generic3Z = 46,
37 Generic3W = 47,
38 Generic4X = 48,
39 Generic4Y = 49,
40 Generic4Z = 50,
41 Generic4W = 51,
42 Generic5X = 52,
43 Generic5Y = 53,
44 Generic5Z = 54,
45 Generic5W = 55,
46 Generic6X = 56,
47 Generic6Y = 57,
48 Generic6Z = 58,
49 Generic6W = 59,
50 Generic7X = 60,
51 Generic7Y = 61,
52 Generic7Z = 62,
53 Generic7W = 63,
54 Generic8X = 64,
55 Generic8Y = 65,
56 Generic8Z = 66,
57 Generic8W = 67,
58 Generic9X = 68,
59 Generic9Y = 69,
60 Generic9Z = 70,
61 Generic9W = 71,
62 Generic10X = 72,
63 Generic10Y = 73,
64 Generic10Z = 74,
65 Generic10W = 75,
66 Generic11X = 76,
67 Generic11Y = 77,
68 Generic11Z = 78,
69 Generic11W = 79,
70 Generic12X = 80,
71 Generic12Y = 81,
72 Generic12Z = 82,
73 Generic12W = 83,
74 Generic13X = 84,
75 Generic13Y = 85,
76 Generic13Z = 86,
77 Generic13W = 87,
78 Generic14X = 88,
79 Generic14Y = 89,
80 Generic14Z = 90,
81 Generic14W = 91,
82 Generic15X = 92,
83 Generic15Y = 93,
84 Generic15Z = 94,
85 Generic15W = 95,
86 Generic16X = 96,
87 Generic16Y = 97,
88 Generic16Z = 98,
89 Generic16W = 99,
90 Generic17X = 100,
91 Generic17Y = 101,
92 Generic17Z = 102,
93 Generic17W = 103,
94 Generic18X = 104,
95 Generic18Y = 105,
96 Generic18Z = 106,
97 Generic18W = 107,
98 Generic19X = 108,
99 Generic19Y = 109,
100 Generic19Z = 110,
101 Generic19W = 111,
102 Generic20X = 112,
103 Generic20Y = 113,
104 Generic20Z = 114,
105 Generic20W = 115,
106 Generic21X = 116,
107 Generic21Y = 117,
108 Generic21Z = 118,
109 Generic21W = 119,
110 Generic22X = 120,
111 Generic22Y = 121,
112 Generic22Z = 122,
113 Generic22W = 123,
114 Generic23X = 124,
115 Generic23Y = 125,
116 Generic23Z = 126,
117 Generic23W = 127,
118 Generic24X = 128,
119 Generic24Y = 129,
120 Generic24Z = 130,
121 Generic24W = 131,
122 Generic25X = 132,
123 Generic25Y = 133,
124 Generic25Z = 134,
125 Generic25W = 135,
126 Generic26X = 136,
127 Generic26Y = 137,
128 Generic26Z = 138,
129 Generic26W = 139,
130 Generic27X = 140,
131 Generic27Y = 141,
132 Generic27Z = 142,
133 Generic27W = 143,
134 Generic28X = 144,
135 Generic28Y = 145,
136 Generic28Z = 146,
137 Generic28W = 147,
138 Generic29X = 148,
139 Generic29Y = 149,
140 Generic29Z = 150,
141 Generic29W = 151,
142 Generic30X = 152,
143 Generic30Y = 153,
144 Generic30Z = 154,
145 Generic30W = 155,
146 Generic31X = 156,
147 Generic31Y = 157,
148 Generic31Z = 158,
149 Generic31W = 159,
150 ColorFrontDiffuseR = 160,
151 ColorFrontDiffuseG = 161,
152 ColorFrontDiffuseB = 162,
153 ColorFrontDiffuseA = 163,
154 ColorFrontSpecularR = 164,
155 ColorFrontSpecularG = 165,
156 ColorFrontSpecularB = 166,
157 ColorFrontSpecularA = 167,
158 ColorBackDiffuseR = 168,
159 ColorBackDiffuseG = 169,
160 ColorBackDiffuseB = 170,
161 ColorBackDiffuseA = 171,
162 ColorBackSpecularR = 172,
163 ColorBackSpecularG = 173,
164 ColorBackSpecularB = 174,
165 ColorBackSpecularA = 175,
166 ClipDistance0 = 176,
167 ClipDistance1 = 177,
168 ClipDistance2 = 178,
169 ClipDistance3 = 179,
170 ClipDistance4 = 180,
171 ClipDistance5 = 181,
172 ClipDistance6 = 182,
173 ClipDistance7 = 183,
174 PointSpriteS = 184,
175 PointSpriteT = 185,
176 FogCoordinate = 186,
177 TessellationEvaluationPointU = 188,
178 TessellationEvaluationPointV = 189,
179 InstanceId = 190,
180 VertexId = 191,
181 FixedFncTexture0S = 192,
182 FixedFncTexture0T = 193,
183 FixedFncTexture0R = 194,
184 FixedFncTexture0Q = 195,
185 FixedFncTexture1S = 196,
186 FixedFncTexture1T = 197,
187 FixedFncTexture1R = 198,
188 FixedFncTexture1Q = 199,
189 FixedFncTexture2S = 200,
190 FixedFncTexture2T = 201,
191 FixedFncTexture2R = 202,
192 FixedFncTexture2Q = 203,
193 FixedFncTexture3S = 204,
194 FixedFncTexture3T = 205,
195 FixedFncTexture3R = 206,
196 FixedFncTexture3Q = 207,
197 FixedFncTexture4S = 208,
198 FixedFncTexture4T = 209,
199 FixedFncTexture4R = 210,
200 FixedFncTexture4Q = 211,
201 FixedFncTexture5S = 212,
202 FixedFncTexture5T = 213,
203 FixedFncTexture5R = 214,
204 FixedFncTexture5Q = 215,
205 FixedFncTexture6S = 216,
206 FixedFncTexture6T = 217,
207 FixedFncTexture6R = 218,
208 FixedFncTexture6Q = 219,
209 FixedFncTexture7S = 220,
210 FixedFncTexture7T = 221,
211 FixedFncTexture7R = 222,
212 FixedFncTexture7Q = 223,
213 FixedFncTexture8S = 224,
214 FixedFncTexture8T = 225,
215 FixedFncTexture8R = 226,
216 FixedFncTexture8Q = 227,
217 FixedFncTexture9S = 228,
218 FixedFncTexture9T = 229,
219 FixedFncTexture9R = 230,
220 FixedFncTexture9Q = 231,
221 ViewportMask = 232,
222 FrontFace = 255,
223};
224
225constexpr size_t NUM_GENERICS = 32;
226
227[[nodiscard]] bool IsGeneric(Attribute attribute) noexcept;
228
229[[nodiscard]] u32 GenericAttributeIndex(Attribute attribute);
230
231[[nodiscard]] u32 GenericAttributeElement(Attribute attribute);
232
233[[nodiscard]] std::string NameOf(Attribute attribute);
234
235[[nodiscard]] constexpr IR::Attribute operator+(IR::Attribute attribute, size_t value) noexcept {
236 return static_cast<IR::Attribute>(static_cast<size_t>(attribute) + value);
237}
238
239} // namespace Shader::IR
240
241template <>
242struct fmt::formatter<Shader::IR::Attribute> {
243 constexpr auto parse(format_parse_context& ctx) {
244 return ctx.begin();
245 }
246 template <typename FormatContext>
247 auto format(const Shader::IR::Attribute& attribute, FormatContext& ctx) {
248 return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(attribute));
249 }
250};
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
new file mode 100644
index 000000000..7c08b25ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -0,0 +1,149 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <initializer_list>
7#include <map>
8#include <memory>
9
10#include "common/bit_cast.h"
11#include "common/common_types.h"
12#include "shader_recompiler/frontend/ir/basic_block.h"
13#include "shader_recompiler/frontend/ir/value.h"
14
15namespace Shader::IR {
16
17Block::Block(ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {}
18
19Block::~Block() = default;
20
21void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) {
22 PrependNewInst(end(), op, args);
23}
24
25Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
26 std::initializer_list<Value> args, u32 flags) {
27 Inst* const inst{inst_pool->Create(op, flags)};
28 const auto result_it{instructions.insert(insertion_point, *inst)};
29
30 if (inst->NumArgs() != args.size()) {
31 throw InvalidArgument("Invalid number of arguments {} in {}", args.size(), op);
32 }
33 std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable {
34 inst->SetArg(index, arg);
35 ++index;
36 });
37 return result_it;
38}
39
40void Block::AddBranch(Block* block) {
41 if (std::ranges::find(imm_successors, block) != imm_successors.end()) {
42 throw LogicError("Successor already inserted");
43 }
44 if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) {
45 throw LogicError("Predecessor already inserted");
46 }
47 imm_successors.push_back(block);
48 block->imm_predecessors.push_back(this);
49}
50
51static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_index,
52 Block* block) {
53 if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) {
54 return fmt::format("{{Block ${}}}", it->second);
55 }
56 return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(block));
57}
58
59static size_t InstIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
60 const Inst* inst) {
61 const auto [it, is_inserted]{inst_to_index.emplace(inst, inst_index + 1)};
62 if (is_inserted) {
63 ++inst_index;
64 }
65 return it->second;
66}
67
68static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
69 const Value& arg) {
70 if (arg.IsEmpty()) {
71 return "<null>";
72 }
73 if (!arg.IsImmediate() || arg.IsIdentity()) {
74 return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst()));
75 }
76 switch (arg.Type()) {
77 case Type::U1:
78 return fmt::format("#{}", arg.U1() ? "true" : "false");
79 case Type::U8:
80 return fmt::format("#{}", arg.U8());
81 case Type::U16:
82 return fmt::format("#{}", arg.U16());
83 case Type::U32:
84 return fmt::format("#{}", arg.U32());
85 case Type::U64:
86 return fmt::format("#{}", arg.U64());
87 case Type::F32:
88 return fmt::format("#{}", arg.F32());
89 case Type::Reg:
90 return fmt::format("{}", arg.Reg());
91 case Type::Pred:
92 return fmt::format("{}", arg.Pred());
93 case Type::Attribute:
94 return fmt::format("{}", arg.Attribute());
95 default:
96 return "<unknown immediate type>";
97 }
98}
99
100std::string DumpBlock(const Block& block) {
101 size_t inst_index{0};
102 std::map<const Inst*, size_t> inst_to_index;
103 return DumpBlock(block, {}, inst_to_index, inst_index);
104}
105
106std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& block_to_index,
107 std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index) {
108 std::string ret{"Block"};
109 if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) {
110 ret += fmt::format(" ${}", it->second);
111 }
112 ret += '\n';
113 for (const Inst& inst : block) {
114 const Opcode op{inst.GetOpcode()};
115 ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst));
116 if (TypeOf(op) != Type::Void) {
117 ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op);
118 } else {
119 ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces
120 }
121 const size_t arg_count{inst.NumArgs()};
122 for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) {
123 const Value arg{inst.Arg(arg_index)};
124 const std::string arg_str{ArgToIndex(inst_to_index, inst_index, arg)};
125 ret += arg_index != 0 ? ", " : " ";
126 if (op == Opcode::Phi) {
127 ret += fmt::format("[ {}, {} ]", arg_str,
128 BlockToIndex(block_to_index, inst.PhiBlock(arg_index)));
129 } else {
130 ret += arg_str;
131 }
132 if (op != Opcode::Phi) {
133 const Type actual_type{arg.Type()};
134 const Type expected_type{ArgTypeOf(op, arg_index)};
135 if (!AreTypesCompatible(actual_type, expected_type)) {
136 ret += fmt::format("<type error: {} != {}>", actual_type, expected_type);
137 }
138 }
139 }
140 if (TypeOf(op) != Type::Void) {
141 ret += fmt::format(" (uses: {})\n", inst.UseCount());
142 } else {
143 ret += '\n';
144 }
145 }
146 return ret;
147}
148
149} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h
new file mode 100644
index 000000000..7e134b4c7
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/basic_block.h
@@ -0,0 +1,185 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <initializer_list>
8#include <map>
9#include <span>
10#include <vector>
11
12#include <boost/intrusive/list.hpp>
13
14#include "common/bit_cast.h"
15#include "common/common_types.h"
16#include "shader_recompiler/frontend/ir/condition.h"
17#include "shader_recompiler/frontend/ir/value.h"
18#include "shader_recompiler/object_pool.h"
19
20namespace Shader::IR {
21
22class Block {
23public:
24 using InstructionList = boost::intrusive::list<Inst>;
25 using size_type = InstructionList::size_type;
26 using iterator = InstructionList::iterator;
27 using const_iterator = InstructionList::const_iterator;
28 using reverse_iterator = InstructionList::reverse_iterator;
29 using const_reverse_iterator = InstructionList::const_reverse_iterator;
30
31 explicit Block(ObjectPool<Inst>& inst_pool_);
32 ~Block();
33
34 Block(const Block&) = delete;
35 Block& operator=(const Block&) = delete;
36
37 Block(Block&&) = default;
38 Block& operator=(Block&&) = default;
39
40 /// Appends a new instruction to the end of this basic block.
41 void AppendNewInst(Opcode op, std::initializer_list<Value> args);
42
43 /// Prepends a new instruction to this basic block before the insertion point.
44 iterator PrependNewInst(iterator insertion_point, Opcode op,
45 std::initializer_list<Value> args = {}, u32 flags = 0);
46
47 /// Adds a new branch to this basic block.
48 void AddBranch(Block* block);
49
50 /// Gets a mutable reference to the instruction list for this basic block.
51 [[nodiscard]] InstructionList& Instructions() noexcept {
52 return instructions;
53 }
54 /// Gets an immutable reference to the instruction list for this basic block.
55 [[nodiscard]] const InstructionList& Instructions() const noexcept {
56 return instructions;
57 }
58
59 /// Gets an immutable span to the immediate predecessors.
60 [[nodiscard]] std::span<Block* const> ImmPredecessors() const noexcept {
61 return imm_predecessors;
62 }
63 /// Gets an immutable span to the immediate successors.
64 [[nodiscard]] std::span<Block* const> ImmSuccessors() const noexcept {
65 return imm_successors;
66 }
67
68 /// Intrusively store the host definition of this instruction.
69 template <typename DefinitionType>
70 void SetDefinition(DefinitionType def) {
71 definition = Common::BitCast<u32>(def);
72 }
73
74 /// Return the intrusively stored host definition of this instruction.
75 template <typename DefinitionType>
76 [[nodiscard]] DefinitionType Definition() const noexcept {
77 return Common::BitCast<DefinitionType>(definition);
78 }
79
80 void SetSsaRegValue(IR::Reg reg, const Value& value) noexcept {
81 ssa_reg_values[RegIndex(reg)] = value;
82 }
83 const Value& SsaRegValue(IR::Reg reg) const noexcept {
84 return ssa_reg_values[RegIndex(reg)];
85 }
86
87 void SsaSeal() noexcept {
88 is_ssa_sealed = true;
89 }
90 [[nodiscard]] bool IsSsaSealed() const noexcept {
91 return is_ssa_sealed;
92 }
93
94 [[nodiscard]] bool empty() const {
95 return instructions.empty();
96 }
97 [[nodiscard]] size_type size() const {
98 return instructions.size();
99 }
100
101 [[nodiscard]] Inst& front() {
102 return instructions.front();
103 }
104 [[nodiscard]] const Inst& front() const {
105 return instructions.front();
106 }
107
108 [[nodiscard]] Inst& back() {
109 return instructions.back();
110 }
111 [[nodiscard]] const Inst& back() const {
112 return instructions.back();
113 }
114
115 [[nodiscard]] iterator begin() {
116 return instructions.begin();
117 }
118 [[nodiscard]] const_iterator begin() const {
119 return instructions.begin();
120 }
121 [[nodiscard]] iterator end() {
122 return instructions.end();
123 }
124 [[nodiscard]] const_iterator end() const {
125 return instructions.end();
126 }
127
128 [[nodiscard]] reverse_iterator rbegin() {
129 return instructions.rbegin();
130 }
131 [[nodiscard]] const_reverse_iterator rbegin() const {
132 return instructions.rbegin();
133 }
134 [[nodiscard]] reverse_iterator rend() {
135 return instructions.rend();
136 }
137 [[nodiscard]] const_reverse_iterator rend() const {
138 return instructions.rend();
139 }
140
141 [[nodiscard]] const_iterator cbegin() const {
142 return instructions.cbegin();
143 }
144 [[nodiscard]] const_iterator cend() const {
145 return instructions.cend();
146 }
147
148 [[nodiscard]] const_reverse_iterator crbegin() const {
149 return instructions.crbegin();
150 }
151 [[nodiscard]] const_reverse_iterator crend() const {
152 return instructions.crend();
153 }
154
155private:
156 /// Memory pool for instruction list
157 ObjectPool<Inst>* inst_pool;
158
159 /// List of instructions in this block
160 InstructionList instructions;
161
162 /// Block immediate predecessors
163 std::vector<Block*> imm_predecessors;
164 /// Block immediate successors
165 std::vector<Block*> imm_successors;
166
167 /// Intrusively store the value of a register in the block.
168 std::array<Value, NUM_REGS> ssa_reg_values;
169 /// Intrusively store if the block is sealed in the SSA pass.
170 bool is_ssa_sealed{false};
171
172 /// Intrusively stored host definition of this block.
173 u32 definition{};
174};
175
176using BlockList = std::vector<Block*>;
177
178[[nodiscard]] std::string DumpBlock(const Block& block);
179
180[[nodiscard]] std::string DumpBlock(const Block& block,
181 const std::map<const Block*, size_t>& block_to_index,
182 std::map<const Inst*, size_t>& inst_to_index,
183 size_t& inst_index);
184
185} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/breadth_first_search.h b/src/shader_recompiler/frontend/ir/breadth_first_search.h
new file mode 100644
index 000000000..a52ccbd58
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/breadth_first_search.h
@@ -0,0 +1,56 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <type_traits>
9#include <queue>
10
11#include <boost/container/small_vector.hpp>
12
13#include "shader_recompiler/frontend/ir/value.h"
14
15namespace Shader::IR {
16
17template <typename Pred>
18auto BreadthFirstSearch(const Value& value, Pred&& pred)
19 -> std::invoke_result_t<Pred, const Inst*> {
20 if (value.IsImmediate()) {
21 // Nothing to do with immediates
22 return std::nullopt;
23 }
24 // Breadth-first search visiting the right most arguments first
25 // Small vector has been determined from shaders in Super Smash Bros. Ultimate
26 boost::container::small_vector<const Inst*, 2> visited;
27 std::queue<const Inst*> queue;
28 queue.push(value.InstRecursive());
29
30 while (!queue.empty()) {
31 // Pop one instruction from the queue
32 const Inst* const inst{queue.front()};
33 queue.pop();
34 if (const std::optional result = pred(inst)) {
35 // This is the instruction we were looking for
36 return result;
37 }
38 // Visit the right most arguments first
39 for (size_t arg = inst->NumArgs(); arg--;) {
40 const Value arg_value{inst->Arg(arg)};
41 if (arg_value.IsImmediate()) {
42 continue;
43 }
44 // Queue instruction if it hasn't been visited
45 const Inst* const arg_inst{arg_value.InstRecursive()};
46 if (std::ranges::find(visited, arg_inst) == visited.end()) {
47 visited.push_back(arg_inst);
48 queue.push(arg_inst);
49 }
50 }
51 }
52 // SSA tree has been traversed and the result hasn't been found
53 return std::nullopt;
54}
55
56} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp
new file mode 100644
index 000000000..fc18ea2a2
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/condition.cpp
@@ -0,0 +1,29 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6
7#include <fmt/format.h>
8
9#include "shader_recompiler/frontend/ir/condition.h"
10
11namespace Shader::IR {
12
13std::string NameOf(Condition condition) {
14 std::string ret;
15 if (condition.GetFlowTest() != FlowTest::T) {
16 ret = fmt::to_string(condition.GetFlowTest());
17 }
18 const auto [pred, negated]{condition.GetPred()};
19 if (!ret.empty()) {
20 ret += '&';
21 }
22 if (negated) {
23 ret += '!';
24 }
25 ret += fmt::to_string(pred);
26 return ret;
27}
28
29} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h
new file mode 100644
index 000000000..aa8597c60
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/condition.h
@@ -0,0 +1,60 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <compare>
8#include <string>
9
10#include <fmt/format.h>
11
12#include "common/common_types.h"
13#include "shader_recompiler/frontend/ir/flow_test.h"
14#include "shader_recompiler/frontend/ir/pred.h"
15
16namespace Shader::IR {
17
18class Condition {
19public:
20 Condition() noexcept = default;
21
22 explicit Condition(FlowTest flow_test_, Pred pred_, bool pred_negated_ = false) noexcept
23 : flow_test{static_cast<u16>(flow_test_)}, pred{static_cast<u8>(pred_)},
24 pred_negated{pred_negated_ ? u8{1} : u8{0}} {}
25
26 explicit Condition(Pred pred_, bool pred_negated_ = false) noexcept
27 : Condition(FlowTest::T, pred_, pred_negated_) {}
28
29 explicit Condition(bool value) : Condition(Pred::PT, !value) {}
30
31 auto operator<=>(const Condition&) const noexcept = default;
32
33 [[nodiscard]] IR::FlowTest GetFlowTest() const noexcept {
34 return static_cast<IR::FlowTest>(flow_test);
35 }
36
37 [[nodiscard]] std::pair<IR::Pred, bool> GetPred() const noexcept {
38 return {static_cast<IR::Pred>(pred), pred_negated != 0};
39 }
40
41private:
42 u16 flow_test;
43 u8 pred;
44 u8 pred_negated;
45};
46
47std::string NameOf(Condition condition);
48
49} // namespace Shader::IR
50
51template <>
52struct fmt::formatter<Shader::IR::Condition> {
53 constexpr auto parse(format_parse_context& ctx) {
54 return ctx.begin();
55 }
56 template <typename FormatContext>
57 auto format(const Shader::IR::Condition& cond, FormatContext& ctx) {
58 return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(cond));
59 }
60};
diff --git a/src/shader_recompiler/frontend/ir/flow_test.cpp b/src/shader_recompiler/frontend/ir/flow_test.cpp
new file mode 100644
index 000000000..6ebb4ad89
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/flow_test.cpp
@@ -0,0 +1,83 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6
7#include <fmt/format.h>
8
9#include "shader_recompiler/frontend/ir/flow_test.h"
10
11namespace Shader::IR {
12
13std::string NameOf(FlowTest flow_test) {
14 switch (flow_test) {
15 case FlowTest::F:
16 return "F";
17 case FlowTest::LT:
18 return "LT";
19 case FlowTest::EQ:
20 return "EQ";
21 case FlowTest::LE:
22 return "LE";
23 case FlowTest::GT:
24 return "GT";
25 case FlowTest::NE:
26 return "NE";
27 case FlowTest::GE:
28 return "GE";
29 case FlowTest::NUM:
30 return "NUM";
31 case FlowTest::NaN:
32 return "NAN";
33 case FlowTest::LTU:
34 return "LTU";
35 case FlowTest::EQU:
36 return "EQU";
37 case FlowTest::LEU:
38 return "LEU";
39 case FlowTest::GTU:
40 return "GTU";
41 case FlowTest::NEU:
42 return "NEU";
43 case FlowTest::GEU:
44 return "GEU";
45 case FlowTest::T:
46 return "T";
47 case FlowTest::OFF:
48 return "OFF";
49 case FlowTest::LO:
50 return "LO";
51 case FlowTest::SFF:
52 return "SFF";
53 case FlowTest::LS:
54 return "LS";
55 case FlowTest::HI:
56 return "HI";
57 case FlowTest::SFT:
58 return "SFT";
59 case FlowTest::HS:
60 return "HS";
61 case FlowTest::OFT:
62 return "OFT";
63 case FlowTest::CSM_TA:
64 return "CSM_TA";
65 case FlowTest::CSM_TR:
66 return "CSM_TR";
67 case FlowTest::CSM_MX:
68 return "CSM_MX";
69 case FlowTest::FCSM_TA:
70 return "FCSM_TA";
71 case FlowTest::FCSM_TR:
72 return "FCSM_TR";
73 case FlowTest::FCSM_MX:
74 return "FCSM_MX";
75 case FlowTest::RLE:
76 return "RLE";
77 case FlowTest::RGT:
78 return "RGT";
79 }
80 return fmt::format("<invalid flow test {}>", static_cast<int>(flow_test));
81}
82
83} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/flow_test.h b/src/shader_recompiler/frontend/ir/flow_test.h
new file mode 100644
index 000000000..09e113773
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/flow_test.h
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <fmt/format.h>
9
10#include "common/common_types.h"
11
12namespace Shader::IR {
13
14enum class FlowTest : u64 {
15 F,
16 LT,
17 EQ,
18 LE,
19 GT,
20 NE,
21 GE,
22 NUM,
23 NaN,
24 LTU,
25 EQU,
26 LEU,
27 GTU,
28 NEU,
29 GEU,
30 T,
31 OFF,
32 LO,
33 SFF,
34 LS,
35 HI,
36 SFT,
37 HS,
38 OFT,
39 CSM_TA,
40 CSM_TR,
41 CSM_MX,
42 FCSM_TA,
43 FCSM_TR,
44 FCSM_MX,
45 RLE,
46 RGT,
47};
48
49[[nodiscard]] std::string NameOf(FlowTest flow_test);
50
51} // namespace Shader::IR
52
53template <>
54struct fmt::formatter<Shader::IR::FlowTest> {
55 constexpr auto parse(format_parse_context& ctx) {
56 return ctx.begin();
57 }
58 template <typename FormatContext>
59 auto format(const Shader::IR::FlowTest& flow_test, FormatContext& ctx) {
60 return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(flow_test));
61 }
62};
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
new file mode 100644
index 000000000..13159a68d
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -0,0 +1,2017 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_cast.h"
6#include "shader_recompiler/frontend/ir/ir_emitter.h"
7#include "shader_recompiler/frontend/ir/value.h"
8
9namespace Shader::IR {
10namespace {
11[[noreturn]] void ThrowInvalidType(Type type) {
12 throw InvalidArgument("Invalid type {}", type);
13}
14
15Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) {
16 if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) {
17 return ir.CompositeConstruct(bias_lod, lod_clamp);
18 } else if (!bias_lod.IsEmpty()) {
19 return bias_lod;
20 } else if (!lod_clamp.IsEmpty()) {
21 return lod_clamp;
22 } else {
23 return Value{};
24 }
25}
26} // Anonymous namespace
27
28U1 IREmitter::Imm1(bool value) const {
29 return U1{Value{value}};
30}
31
32U8 IREmitter::Imm8(u8 value) const {
33 return U8{Value{value}};
34}
35
36U16 IREmitter::Imm16(u16 value) const {
37 return U16{Value{value}};
38}
39
40U32 IREmitter::Imm32(u32 value) const {
41 return U32{Value{value}};
42}
43
44U32 IREmitter::Imm32(s32 value) const {
45 return U32{Value{static_cast<u32>(value)}};
46}
47
48F32 IREmitter::Imm32(f32 value) const {
49 return F32{Value{value}};
50}
51
52U64 IREmitter::Imm64(u64 value) const {
53 return U64{Value{value}};
54}
55
56U64 IREmitter::Imm64(s64 value) const {
57 return U64{Value{static_cast<u64>(value)}};
58}
59
60F64 IREmitter::Imm64(f64 value) const {
61 return F64{Value{value}};
62}
63
64U1 IREmitter::ConditionRef(const U1& value) {
65 return Inst<U1>(Opcode::ConditionRef, value);
66}
67
68void IREmitter::Reference(const Value& value) {
69 Inst(Opcode::Reference, value);
70}
71
72void IREmitter::PhiMove(IR::Inst& phi, const Value& value) {
73 Inst(Opcode::PhiMove, Value{&phi}, value);
74}
75
76void IREmitter::Prologue() {
77 Inst(Opcode::Prologue);
78}
79
80void IREmitter::Epilogue() {
81 Inst(Opcode::Epilogue);
82}
83
84void IREmitter::DemoteToHelperInvocation() {
85 Inst(Opcode::DemoteToHelperInvocation);
86}
87
88void IREmitter::EmitVertex(const U32& stream) {
89 Inst(Opcode::EmitVertex, stream);
90}
91
92void IREmitter::EndPrimitive(const U32& stream) {
93 Inst(Opcode::EndPrimitive, stream);
94}
95
96void IREmitter::Barrier() {
97 Inst(Opcode::Barrier);
98}
99
100void IREmitter::WorkgroupMemoryBarrier() {
101 Inst(Opcode::WorkgroupMemoryBarrier);
102}
103
104void IREmitter::DeviceMemoryBarrier() {
105 Inst(Opcode::DeviceMemoryBarrier);
106}
107
108U32 IREmitter::GetReg(IR::Reg reg) {
109 return Inst<U32>(Opcode::GetRegister, reg);
110}
111
112void IREmitter::SetReg(IR::Reg reg, const U32& value) {
113 Inst(Opcode::SetRegister, reg, value);
114}
115
116U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) {
117 if (pred == Pred::PT) {
118 return Imm1(!is_negated);
119 }
120 const U1 value{Inst<U1>(Opcode::GetPred, pred)};
121 if (is_negated) {
122 return Inst<U1>(Opcode::LogicalNot, value);
123 } else {
124 return value;
125 }
126}
127
128void IREmitter::SetPred(IR::Pred pred, const U1& value) {
129 if (pred != IR::Pred::PT) {
130 Inst(Opcode::SetPred, pred, value);
131 }
132}
133
134U1 IREmitter::GetGotoVariable(u32 id) {
135 return Inst<U1>(Opcode::GetGotoVariable, id);
136}
137
138void IREmitter::SetGotoVariable(u32 id, const U1& value) {
139 Inst(Opcode::SetGotoVariable, id, value);
140}
141
142U32 IREmitter::GetIndirectBranchVariable() {
143 return Inst<U32>(Opcode::GetIndirectBranchVariable);
144}
145
146void IREmitter::SetIndirectBranchVariable(const U32& value) {
147 Inst(Opcode::SetIndirectBranchVariable, value);
148}
149
150U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) {
151 return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset);
152}
153
154Value IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
155 bool is_signed) {
156 switch (bitsize) {
157 case 8:
158 return Inst<U32>(is_signed ? Opcode::GetCbufS8 : Opcode::GetCbufU8, binding, byte_offset);
159 case 16:
160 return Inst<U32>(is_signed ? Opcode::GetCbufS16 : Opcode::GetCbufU16, binding, byte_offset);
161 case 32:
162 return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset);
163 case 64:
164 return Inst(Opcode::GetCbufU32x2, binding, byte_offset);
165 default:
166 throw InvalidArgument("Invalid bit size {}", bitsize);
167 }
168}
169
170F32 IREmitter::GetFloatCbuf(const U32& binding, const U32& byte_offset) {
171 return Inst<F32>(Opcode::GetCbufF32, binding, byte_offset);
172}
173
174U1 IREmitter::GetZFlag() {
175 return Inst<U1>(Opcode::GetZFlag);
176}
177
178U1 IREmitter::GetSFlag() {
179 return Inst<U1>(Opcode::GetSFlag);
180}
181
182U1 IREmitter::GetCFlag() {
183 return Inst<U1>(Opcode::GetCFlag);
184}
185
186U1 IREmitter::GetOFlag() {
187 return Inst<U1>(Opcode::GetOFlag);
188}
189
190void IREmitter::SetZFlag(const U1& value) {
191 Inst(Opcode::SetZFlag, value);
192}
193
194void IREmitter::SetSFlag(const U1& value) {
195 Inst(Opcode::SetSFlag, value);
196}
197
198void IREmitter::SetCFlag(const U1& value) {
199 Inst(Opcode::SetCFlag, value);
200}
201
202void IREmitter::SetOFlag(const U1& value) {
203 Inst(Opcode::SetOFlag, value);
204}
205
206static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) {
207 switch (flow_test) {
208 case FlowTest::F:
209 return ir.Imm1(false);
210 case FlowTest::LT:
211 return ir.LogicalXor(ir.LogicalAnd(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag())),
212 ir.GetOFlag());
213 case FlowTest::EQ:
214 return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag());
215 case FlowTest::LE:
216 return ir.LogicalXor(ir.GetSFlag(), ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag()));
217 case FlowTest::GT:
218 return ir.LogicalAnd(ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()), ir.GetOFlag()),
219 ir.LogicalNot(ir.GetZFlag()));
220 case FlowTest::NE:
221 return ir.LogicalNot(ir.GetZFlag());
222 case FlowTest::GE:
223 return ir.LogicalNot(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()));
224 case FlowTest::NUM:
225 return ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag()));
226 case FlowTest::NaN:
227 return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag());
228 case FlowTest::LTU:
229 return ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag());
230 case FlowTest::EQU:
231 return ir.GetZFlag();
232 case FlowTest::LEU:
233 return ir.LogicalOr(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()), ir.GetZFlag());
234 case FlowTest::GTU:
235 return ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()),
236 ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag()));
237 case FlowTest::NEU:
238 return ir.LogicalOr(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag()));
239 case FlowTest::GEU:
240 return ir.LogicalXor(ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag()),
241 ir.GetOFlag());
242 case FlowTest::T:
243 return ir.Imm1(true);
244 case FlowTest::OFF:
245 return ir.LogicalNot(ir.GetOFlag());
246 case FlowTest::LO:
247 return ir.LogicalNot(ir.GetCFlag());
248 case FlowTest::SFF:
249 return ir.LogicalNot(ir.GetSFlag());
250 case FlowTest::LS:
251 return ir.LogicalOr(ir.GetZFlag(), ir.LogicalNot(ir.GetCFlag()));
252 case FlowTest::HI:
253 return ir.LogicalAnd(ir.GetCFlag(), ir.LogicalNot(ir.GetZFlag()));
254 case FlowTest::SFT:
255 return ir.GetSFlag();
256 case FlowTest::HS:
257 return ir.GetCFlag();
258 case FlowTest::OFT:
259 return ir.GetOFlag();
260 case FlowTest::RLE:
261 return ir.LogicalOr(ir.GetSFlag(), ir.GetZFlag());
262 case FlowTest::RGT:
263 return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag()));
264 case FlowTest::FCSM_TR:
265 LOG_WARNING(Shader, "(STUBBED) FCSM_TR");
266 return ir.Imm1(false);
267 case FlowTest::CSM_TA:
268 case FlowTest::CSM_TR:
269 case FlowTest::CSM_MX:
270 case FlowTest::FCSM_TA:
271 case FlowTest::FCSM_MX:
272 default:
273 throw NotImplementedException("Flow test {}", flow_test);
274 }
275}
276
277U1 IREmitter::Condition(IR::Condition cond) {
278 const FlowTest flow_test{cond.GetFlowTest()};
279 const auto [pred, is_negated]{cond.GetPred()};
280 if (flow_test == FlowTest::T) {
281 return GetPred(pred, is_negated);
282 }
283 return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test));
284}
285
286U1 IREmitter::GetFlowTestResult(FlowTest test) {
287 return GetFlowTest(*this, test);
288}
289
290F32 IREmitter::GetAttribute(IR::Attribute attribute) {
291 return GetAttribute(attribute, Imm32(0));
292}
293
294F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) {
295 return Inst<F32>(Opcode::GetAttribute, attribute, vertex);
296}
297
298void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) {
299 Inst(Opcode::SetAttribute, attribute, value, vertex);
300}
301
302F32 IREmitter::GetAttributeIndexed(const U32& phys_address) {
303 return GetAttributeIndexed(phys_address, Imm32(0));
304}
305
306F32 IREmitter::GetAttributeIndexed(const U32& phys_address, const U32& vertex) {
307 return Inst<F32>(Opcode::GetAttributeIndexed, phys_address, vertex);
308}
309
310void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex) {
311 Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex);
312}
313
314F32 IREmitter::GetPatch(Patch patch) {
315 return Inst<F32>(Opcode::GetPatch, patch);
316}
317
318void IREmitter::SetPatch(Patch patch, const F32& value) {
319 Inst(Opcode::SetPatch, patch, value);
320}
321
322void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) {
323 Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value);
324}
325
326void IREmitter::SetSampleMask(const U32& value) {
327 Inst(Opcode::SetSampleMask, value);
328}
329
330void IREmitter::SetFragDepth(const F32& value) {
331 Inst(Opcode::SetFragDepth, value);
332}
333
334U32 IREmitter::WorkgroupIdX() {
335 return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)};
336}
337
338U32 IREmitter::WorkgroupIdY() {
339 return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 1)};
340}
341
342U32 IREmitter::WorkgroupIdZ() {
343 return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)};
344}
345
346Value IREmitter::LocalInvocationId() {
347 return Inst(Opcode::LocalInvocationId);
348}
349
350U32 IREmitter::LocalInvocationIdX() {
351 return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)};
352}
353
354U32 IREmitter::LocalInvocationIdY() {
355 return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 1)};
356}
357
358U32 IREmitter::LocalInvocationIdZ() {
359 return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)};
360}
361
362U32 IREmitter::InvocationId() {
363 return Inst<U32>(Opcode::InvocationId);
364}
365
366U32 IREmitter::SampleId() {
367 return Inst<U32>(Opcode::SampleId);
368}
369
370U1 IREmitter::IsHelperInvocation() {
371 return Inst<U1>(Opcode::IsHelperInvocation);
372}
373
374F32 IREmitter::YDirection() {
375 return Inst<F32>(Opcode::YDirection);
376}
377
378U32 IREmitter::LaneId() {
379 return Inst<U32>(Opcode::LaneId);
380}
381
382U32 IREmitter::LoadGlobalU8(const U64& address) {
383 return Inst<U32>(Opcode::LoadGlobalU8, address);
384}
385
386U32 IREmitter::LoadGlobalS8(const U64& address) {
387 return Inst<U32>(Opcode::LoadGlobalS8, address);
388}
389
390U32 IREmitter::LoadGlobalU16(const U64& address) {
391 return Inst<U32>(Opcode::LoadGlobalU16, address);
392}
393
394U32 IREmitter::LoadGlobalS16(const U64& address) {
395 return Inst<U32>(Opcode::LoadGlobalS16, address);
396}
397
398U32 IREmitter::LoadGlobal32(const U64& address) {
399 return Inst<U32>(Opcode::LoadGlobal32, address);
400}
401
402Value IREmitter::LoadGlobal64(const U64& address) {
403 return Inst<Value>(Opcode::LoadGlobal64, address);
404}
405
406Value IREmitter::LoadGlobal128(const U64& address) {
407 return Inst<Value>(Opcode::LoadGlobal128, address);
408}
409
410void IREmitter::WriteGlobalU8(const U64& address, const U32& value) {
411 Inst(Opcode::WriteGlobalU8, address, value);
412}
413
414void IREmitter::WriteGlobalS8(const U64& address, const U32& value) {
415 Inst(Opcode::WriteGlobalS8, address, value);
416}
417
418void IREmitter::WriteGlobalU16(const U64& address, const U32& value) {
419 Inst(Opcode::WriteGlobalU16, address, value);
420}
421
422void IREmitter::WriteGlobalS16(const U64& address, const U32& value) {
423 Inst(Opcode::WriteGlobalS16, address, value);
424}
425
426void IREmitter::WriteGlobal32(const U64& address, const U32& value) {
427 Inst(Opcode::WriteGlobal32, address, value);
428}
429
430void IREmitter::WriteGlobal64(const U64& address, const IR::Value& vector) {
431 Inst(Opcode::WriteGlobal64, address, vector);
432}
433
434void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) {
435 Inst(Opcode::WriteGlobal128, address, vector);
436}
437
438U32 IREmitter::LoadLocal(const IR::U32& word_offset) {
439 return Inst<U32>(Opcode::LoadLocal, word_offset);
440}
441
442void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) {
443 Inst(Opcode::WriteLocal, word_offset, value);
444}
445
446Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) {
447 switch (bit_size) {
448 case 8:
449 return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
450 case 16:
451 return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
452 case 32:
453 return Inst(Opcode::LoadSharedU32, offset);
454 case 64:
455 return Inst(Opcode::LoadSharedU64, offset);
456 case 128:
457 return Inst(Opcode::LoadSharedU128, offset);
458 }
459 throw InvalidArgument("Invalid bit size {}", bit_size);
460}
461
462void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) {
463 switch (bit_size) {
464 case 8:
465 Inst(Opcode::WriteSharedU8, offset, value);
466 break;
467 case 16:
468 Inst(Opcode::WriteSharedU16, offset, value);
469 break;
470 case 32:
471 Inst(Opcode::WriteSharedU32, offset, value);
472 break;
473 case 64:
474 Inst(Opcode::WriteSharedU64, offset, value);
475 break;
476 case 128:
477 Inst(Opcode::WriteSharedU128, offset, value);
478 break;
479 default:
480 throw InvalidArgument("Invalid bit size {}", bit_size);
481 }
482}
483
484U1 IREmitter::GetZeroFromOp(const Value& op) {
485 return Inst<U1>(Opcode::GetZeroFromOp, op);
486}
487
488U1 IREmitter::GetSignFromOp(const Value& op) {
489 return Inst<U1>(Opcode::GetSignFromOp, op);
490}
491
492U1 IREmitter::GetCarryFromOp(const Value& op) {
493 return Inst<U1>(Opcode::GetCarryFromOp, op);
494}
495
496U1 IREmitter::GetOverflowFromOp(const Value& op) {
497 return Inst<U1>(Opcode::GetOverflowFromOp, op);
498}
499
500U1 IREmitter::GetSparseFromOp(const Value& op) {
501 return Inst<U1>(Opcode::GetSparseFromOp, op);
502}
503
504U1 IREmitter::GetInBoundsFromOp(const Value& op) {
505 return Inst<U1>(Opcode::GetInBoundsFromOp, op);
506}
507
508F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) {
509 if (a.Type() != b.Type()) {
510 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
511 }
512 switch (a.Type()) {
513 case Type::F16:
514 return Inst<F16>(Opcode::FPAdd16, Flags{control}, a, b);
515 case Type::F32:
516 return Inst<F32>(Opcode::FPAdd32, Flags{control}, a, b);
517 case Type::F64:
518 return Inst<F64>(Opcode::FPAdd64, Flags{control}, a, b);
519 default:
520 ThrowInvalidType(a.Type());
521 }
522}
523
524Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) {
525 if (e1.Type() != e2.Type()) {
526 throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type());
527 }
528 switch (e1.Type()) {
529 case Type::U32:
530 return Inst(Opcode::CompositeConstructU32x2, e1, e2);
531 case Type::F16:
532 return Inst(Opcode::CompositeConstructF16x2, e1, e2);
533 case Type::F32:
534 return Inst(Opcode::CompositeConstructF32x2, e1, e2);
535 case Type::F64:
536 return Inst(Opcode::CompositeConstructF64x2, e1, e2);
537 default:
538 ThrowInvalidType(e1.Type());
539 }
540}
541
542Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3) {
543 if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) {
544 throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type());
545 }
546 switch (e1.Type()) {
547 case Type::U32:
548 return Inst(Opcode::CompositeConstructU32x3, e1, e2, e3);
549 case Type::F16:
550 return Inst(Opcode::CompositeConstructF16x3, e1, e2, e3);
551 case Type::F32:
552 return Inst(Opcode::CompositeConstructF32x3, e1, e2, e3);
553 case Type::F64:
554 return Inst(Opcode::CompositeConstructF64x3, e1, e2, e3);
555 default:
556 ThrowInvalidType(e1.Type());
557 }
558}
559
560Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
561 const Value& e4) {
562 if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) {
563 throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(),
564 e3.Type(), e4.Type());
565 }
566 switch (e1.Type()) {
567 case Type::U32:
568 return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4);
569 case Type::F16:
570 return Inst(Opcode::CompositeConstructF16x4, e1, e2, e3, e4);
571 case Type::F32:
572 return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4);
573 case Type::F64:
574 return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4);
575 default:
576 ThrowInvalidType(e1.Type());
577 }
578}
579
580Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
581 const auto read{[&](Opcode opcode, size_t limit) -> Value {
582 if (element >= limit) {
583 throw InvalidArgument("Out of bounds element {}", element);
584 }
585 return Inst(opcode, vector, Value{static_cast<u32>(element)});
586 }};
587 switch (vector.Type()) {
588 case Type::U32x2:
589 return read(Opcode::CompositeExtractU32x2, 2);
590 case Type::U32x3:
591 return read(Opcode::CompositeExtractU32x3, 3);
592 case Type::U32x4:
593 return read(Opcode::CompositeExtractU32x4, 4);
594 case Type::F16x2:
595 return read(Opcode::CompositeExtractF16x2, 2);
596 case Type::F16x3:
597 return read(Opcode::CompositeExtractF16x3, 3);
598 case Type::F16x4:
599 return read(Opcode::CompositeExtractF16x4, 4);
600 case Type::F32x2:
601 return read(Opcode::CompositeExtractF32x2, 2);
602 case Type::F32x3:
603 return read(Opcode::CompositeExtractF32x3, 3);
604 case Type::F32x4:
605 return read(Opcode::CompositeExtractF32x4, 4);
606 case Type::F64x2:
607 return read(Opcode::CompositeExtractF64x2, 2);
608 case Type::F64x3:
609 return read(Opcode::CompositeExtractF64x3, 3);
610 case Type::F64x4:
611 return read(Opcode::CompositeExtractF64x4, 4);
612 default:
613 ThrowInvalidType(vector.Type());
614 }
615}
616
617Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) {
618 const auto insert{[&](Opcode opcode, size_t limit) {
619 if (element >= limit) {
620 throw InvalidArgument("Out of bounds element {}", element);
621 }
622 return Inst(opcode, vector, object, Value{static_cast<u32>(element)});
623 }};
624 switch (vector.Type()) {
625 case Type::U32x2:
626 return insert(Opcode::CompositeInsertU32x2, 2);
627 case Type::U32x3:
628 return insert(Opcode::CompositeInsertU32x3, 3);
629 case Type::U32x4:
630 return insert(Opcode::CompositeInsertU32x4, 4);
631 case Type::F16x2:
632 return insert(Opcode::CompositeInsertF16x2, 2);
633 case Type::F16x3:
634 return insert(Opcode::CompositeInsertF16x3, 3);
635 case Type::F16x4:
636 return insert(Opcode::CompositeInsertF16x4, 4);
637 case Type::F32x2:
638 return insert(Opcode::CompositeInsertF32x2, 2);
639 case Type::F32x3:
640 return insert(Opcode::CompositeInsertF32x3, 3);
641 case Type::F32x4:
642 return insert(Opcode::CompositeInsertF32x4, 4);
643 case Type::F64x2:
644 return insert(Opcode::CompositeInsertF64x2, 2);
645 case Type::F64x3:
646 return insert(Opcode::CompositeInsertF64x3, 3);
647 case Type::F64x4:
648 return insert(Opcode::CompositeInsertF64x4, 4);
649 default:
650 ThrowInvalidType(vector.Type());
651 }
652}
653
654Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) {
655 if (true_value.Type() != false_value.Type()) {
656 throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type());
657 }
658 switch (true_value.Type()) {
659 case Type::U1:
660 return Inst(Opcode::SelectU1, condition, true_value, false_value);
661 case Type::U8:
662 return Inst(Opcode::SelectU8, condition, true_value, false_value);
663 case Type::U16:
664 return Inst(Opcode::SelectU16, condition, true_value, false_value);
665 case Type::U32:
666 return Inst(Opcode::SelectU32, condition, true_value, false_value);
667 case Type::U64:
668 return Inst(Opcode::SelectU64, condition, true_value, false_value);
669 case Type::F32:
670 return Inst(Opcode::SelectF32, condition, true_value, false_value);
671 case Type::F64:
672 return Inst(Opcode::SelectF64, condition, true_value, false_value);
673 default:
674 throw InvalidArgument("Invalid type {}", true_value.Type());
675 }
676}
677
678template <>
679IR::U32 IREmitter::BitCast<IR::U32, IR::F32>(const IR::F32& value) {
680 return Inst<IR::U32>(Opcode::BitCastU32F32, value);
681}
682
683template <>
684IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) {
685 return Inst<IR::F32>(Opcode::BitCastF32U32, value);
686}
687
688template <>
689IR::U16 IREmitter::BitCast<IR::U16, IR::F16>(const IR::F16& value) {
690 return Inst<IR::U16>(Opcode::BitCastU16F16, value);
691}
692
693template <>
694IR::F16 IREmitter::BitCast<IR::F16, IR::U16>(const IR::U16& value) {
695 return Inst<IR::F16>(Opcode::BitCastF16U16, value);
696}
697
698template <>
699IR::U64 IREmitter::BitCast<IR::U64, IR::F64>(const IR::F64& value) {
700 return Inst<IR::U64>(Opcode::BitCastU64F64, value);
701}
702
703template <>
704IR::F64 IREmitter::BitCast<IR::F64, IR::U64>(const IR::U64& value) {
705 return Inst<IR::F64>(Opcode::BitCastF64U64, value);
706}
707
708U64 IREmitter::PackUint2x32(const Value& vector) {
709 return Inst<U64>(Opcode::PackUint2x32, vector);
710}
711
712Value IREmitter::UnpackUint2x32(const U64& value) {
713 return Inst<Value>(Opcode::UnpackUint2x32, value);
714}
715
716U32 IREmitter::PackFloat2x16(const Value& vector) {
717 return Inst<U32>(Opcode::PackFloat2x16, vector);
718}
719
720Value IREmitter::UnpackFloat2x16(const U32& value) {
721 return Inst(Opcode::UnpackFloat2x16, value);
722}
723
724U32 IREmitter::PackHalf2x16(const Value& vector) {
725 return Inst<U32>(Opcode::PackHalf2x16, vector);
726}
727
728Value IREmitter::UnpackHalf2x16(const U32& value) {
729 return Inst(Opcode::UnpackHalf2x16, value);
730}
731
732F64 IREmitter::PackDouble2x32(const Value& vector) {
733 return Inst<F64>(Opcode::PackDouble2x32, vector);
734}
735
736Value IREmitter::UnpackDouble2x32(const F64& value) {
737 return Inst<Value>(Opcode::UnpackDouble2x32, value);
738}
739
740F16F32F64 IREmitter::FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control) {
741 if (a.Type() != b.Type()) {
742 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
743 }
744 switch (a.Type()) {
745 case Type::F16:
746 return Inst<F16>(Opcode::FPMul16, Flags{control}, a, b);
747 case Type::F32:
748 return Inst<F32>(Opcode::FPMul32, Flags{control}, a, b);
749 case Type::F64:
750 return Inst<F64>(Opcode::FPMul64, Flags{control}, a, b);
751 default:
752 ThrowInvalidType(a.Type());
753 }
754}
755
756F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c,
757 FpControl control) {
758 if (a.Type() != b.Type() || a.Type() != c.Type()) {
759 throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
760 }
761 switch (a.Type()) {
762 case Type::F16:
763 return Inst<F16>(Opcode::FPFma16, Flags{control}, a, b, c);
764 case Type::F32:
765 return Inst<F32>(Opcode::FPFma32, Flags{control}, a, b, c);
766 case Type::F64:
767 return Inst<F64>(Opcode::FPFma64, Flags{control}, a, b, c);
768 default:
769 ThrowInvalidType(a.Type());
770 }
771}
772
773F16F32F64 IREmitter::FPAbs(const F16F32F64& value) {
774 switch (value.Type()) {
775 case Type::F16:
776 return Inst<F16>(Opcode::FPAbs16, value);
777 case Type::F32:
778 return Inst<F32>(Opcode::FPAbs32, value);
779 case Type::F64:
780 return Inst<F64>(Opcode::FPAbs64, value);
781 default:
782 ThrowInvalidType(value.Type());
783 }
784}
785
786F16F32F64 IREmitter::FPNeg(const F16F32F64& value) {
787 switch (value.Type()) {
788 case Type::F16:
789 return Inst<F16>(Opcode::FPNeg16, value);
790 case Type::F32:
791 return Inst<F32>(Opcode::FPNeg32, value);
792 case Type::F64:
793 return Inst<F64>(Opcode::FPNeg64, value);
794 default:
795 ThrowInvalidType(value.Type());
796 }
797}
798
799F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) {
800 F16F32F64 result{value};
801 if (abs) {
802 result = FPAbs(result);
803 }
804 if (neg) {
805 result = FPNeg(result);
806 }
807 return result;
808}
809
810F32 IREmitter::FPCos(const F32& value) {
811 return Inst<F32>(Opcode::FPCos, value);
812}
813
814F32 IREmitter::FPSin(const F32& value) {
815 return Inst<F32>(Opcode::FPSin, value);
816}
817
818F32 IREmitter::FPExp2(const F32& value) {
819 return Inst<F32>(Opcode::FPExp2, value);
820}
821
822F32 IREmitter::FPLog2(const F32& value) {
823 return Inst<F32>(Opcode::FPLog2, value);
824}
825
826F32F64 IREmitter::FPRecip(const F32F64& value) {
827 switch (value.Type()) {
828 case Type::F32:
829 return Inst<F32>(Opcode::FPRecip32, value);
830 case Type::F64:
831 return Inst<F64>(Opcode::FPRecip64, value);
832 default:
833 ThrowInvalidType(value.Type());
834 }
835}
836
837F32F64 IREmitter::FPRecipSqrt(const F32F64& value) {
838 switch (value.Type()) {
839 case Type::F32:
840 return Inst<F32>(Opcode::FPRecipSqrt32, value);
841 case Type::F64:
842 return Inst<F64>(Opcode::FPRecipSqrt64, value);
843 default:
844 ThrowInvalidType(value.Type());
845 }
846}
847
848F32 IREmitter::FPSqrt(const F32& value) {
849 return Inst<F32>(Opcode::FPSqrt, value);
850}
851
852F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) {
853 switch (value.Type()) {
854 case Type::F16:
855 return Inst<F16>(Opcode::FPSaturate16, value);
856 case Type::F32:
857 return Inst<F32>(Opcode::FPSaturate32, value);
858 case Type::F64:
859 return Inst<F64>(Opcode::FPSaturate64, value);
860 default:
861 ThrowInvalidType(value.Type());
862 }
863}
864
865F16F32F64 IREmitter::FPClamp(const F16F32F64& value, const F16F32F64& min_value,
866 const F16F32F64& max_value) {
867 if (value.Type() != min_value.Type() || value.Type() != max_value.Type()) {
868 throw InvalidArgument("Mismatching types {}, {}, and {}", value.Type(), min_value.Type(),
869 max_value.Type());
870 }
871 switch (value.Type()) {
872 case Type::F16:
873 return Inst<F16>(Opcode::FPClamp16, value, min_value, max_value);
874 case Type::F32:
875 return Inst<F32>(Opcode::FPClamp32, value, min_value, max_value);
876 case Type::F64:
877 return Inst<F64>(Opcode::FPClamp64, value, min_value, max_value);
878 default:
879 ThrowInvalidType(value.Type());
880 }
881}
882
883F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) {
884 switch (value.Type()) {
885 case Type::F16:
886 return Inst<F16>(Opcode::FPRoundEven16, Flags{control}, value);
887 case Type::F32:
888 return Inst<F32>(Opcode::FPRoundEven32, Flags{control}, value);
889 case Type::F64:
890 return Inst<F64>(Opcode::FPRoundEven64, Flags{control}, value);
891 default:
892 ThrowInvalidType(value.Type());
893 }
894}
895
896F16F32F64 IREmitter::FPFloor(const F16F32F64& value, FpControl control) {
897 switch (value.Type()) {
898 case Type::F16:
899 return Inst<F16>(Opcode::FPFloor16, Flags{control}, value);
900 case Type::F32:
901 return Inst<F32>(Opcode::FPFloor32, Flags{control}, value);
902 case Type::F64:
903 return Inst<F64>(Opcode::FPFloor64, Flags{control}, value);
904 default:
905 ThrowInvalidType(value.Type());
906 }
907}
908
909F16F32F64 IREmitter::FPCeil(const F16F32F64& value, FpControl control) {
910 switch (value.Type()) {
911 case Type::F16:
912 return Inst<F16>(Opcode::FPCeil16, Flags{control}, value);
913 case Type::F32:
914 return Inst<F32>(Opcode::FPCeil32, Flags{control}, value);
915 case Type::F64:
916 return Inst<F64>(Opcode::FPCeil64, Flags{control}, value);
917 default:
918 ThrowInvalidType(value.Type());
919 }
920}
921
922F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) {
923 switch (value.Type()) {
924 case Type::F16:
925 return Inst<F16>(Opcode::FPTrunc16, Flags{control}, value);
926 case Type::F32:
927 return Inst<F32>(Opcode::FPTrunc32, Flags{control}, value);
928 case Type::F64:
929 return Inst<F64>(Opcode::FPTrunc64, Flags{control}, value);
930 default:
931 ThrowInvalidType(value.Type());
932 }
933}
934
935U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, bool ordered) {
936 if (lhs.Type() != rhs.Type()) {
937 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
938 }
939 switch (lhs.Type()) {
940 case Type::F16:
941 return Inst<U1>(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, Flags{control},
942 lhs, rhs);
943 case Type::F32:
944 return Inst<U1>(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, Flags{control},
945 lhs, rhs);
946 case Type::F64:
947 return Inst<U1>(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, Flags{control},
948 lhs, rhs);
949 default:
950 ThrowInvalidType(lhs.Type());
951 }
952}
953
954U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
955 bool ordered) {
956 if (lhs.Type() != rhs.Type()) {
957 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
958 }
959 switch (lhs.Type()) {
960 case Type::F16:
961 return Inst<U1>(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16,
962 Flags{control}, lhs, rhs);
963 case Type::F32:
964 return Inst<U1>(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32,
965 Flags{control}, lhs, rhs);
966 case Type::F64:
967 return Inst<U1>(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64,
968 Flags{control}, lhs, rhs);
969 default:
970 ThrowInvalidType(lhs.Type());
971 }
972}
973
974U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
975 bool ordered) {
976 if (lhs.Type() != rhs.Type()) {
977 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
978 }
979 switch (lhs.Type()) {
980 case Type::F16:
981 return Inst<U1>(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16,
982 Flags{control}, lhs, rhs);
983 case Type::F32:
984 return Inst<U1>(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32,
985 Flags{control}, lhs, rhs);
986 case Type::F64:
987 return Inst<U1>(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64,
988 Flags{control}, lhs, rhs);
989 default:
990 ThrowInvalidType(lhs.Type());
991 }
992}
993
994U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
995 bool ordered) {
996 if (lhs.Type() != rhs.Type()) {
997 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
998 }
999 switch (lhs.Type()) {
1000 case Type::F16:
1001 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16,
1002 Flags{control}, lhs, rhs);
1003 case Type::F32:
1004 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32,
1005 Flags{control}, lhs, rhs);
1006 case Type::F64:
1007 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64,
1008 Flags{control}, lhs, rhs);
1009 default:
1010 ThrowInvalidType(lhs.Type());
1011 }
1012}
1013
1014U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
1015 bool ordered) {
1016 if (lhs.Type() != rhs.Type()) {
1017 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1018 }
1019 switch (lhs.Type()) {
1020 case Type::F16:
1021 return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16,
1022 Flags{control}, lhs, rhs);
1023 case Type::F32:
1024 return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32,
1025 Flags{control}, lhs, rhs);
1026 case Type::F64:
1027 return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64,
1028 Flags{control}, lhs, rhs);
1029 default:
1030 ThrowInvalidType(lhs.Type());
1031 }
1032}
1033
1034U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
1035 bool ordered) {
1036 if (lhs.Type() != rhs.Type()) {
1037 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1038 }
1039 switch (lhs.Type()) {
1040 case Type::F16:
1041 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual16
1042 : Opcode::FPUnordGreaterThanEqual16,
1043 Flags{control}, lhs, rhs);
1044 case Type::F32:
1045 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual32
1046 : Opcode::FPUnordGreaterThanEqual32,
1047 Flags{control}, lhs, rhs);
1048 case Type::F64:
1049 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual64
1050 : Opcode::FPUnordGreaterThanEqual64,
1051 Flags{control}, lhs, rhs);
1052 default:
1053 ThrowInvalidType(lhs.Type());
1054 }
1055}
1056
1057U1 IREmitter::FPIsNan(const F16F32F64& value) {
1058 switch (value.Type()) {
1059 case Type::F16:
1060 return Inst<U1>(Opcode::FPIsNan16, value);
1061 case Type::F32:
1062 return Inst<U1>(Opcode::FPIsNan32, value);
1063 case Type::F64:
1064 return Inst<U1>(Opcode::FPIsNan64, value);
1065 default:
1066 ThrowInvalidType(value.Type());
1067 }
1068}
1069
1070U1 IREmitter::FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs) {
1071 if (lhs.Type() != rhs.Type()) {
1072 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1073 }
1074 return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs)));
1075}
1076
1077U1 IREmitter::FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs) {
1078 if (lhs.Type() != rhs.Type()) {
1079 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1080 }
1081 return LogicalOr(FPIsNan(lhs), FPIsNan(rhs));
1082}
1083
1084F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control) {
1085 if (lhs.Type() != rhs.Type()) {
1086 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1087 }
1088 switch (lhs.Type()) {
1089 case Type::F32:
1090 return Inst<F32>(Opcode::FPMax32, Flags{control}, lhs, rhs);
1091 case Type::F64:
1092 return Inst<F64>(Opcode::FPMax64, Flags{control}, lhs, rhs);
1093 default:
1094 ThrowInvalidType(lhs.Type());
1095 }
1096}
1097
1098F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control) {
1099 if (lhs.Type() != rhs.Type()) {
1100 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1101 }
1102 switch (lhs.Type()) {
1103 case Type::F32:
1104 return Inst<F32>(Opcode::FPMin32, Flags{control}, lhs, rhs);
1105 case Type::F64:
1106 return Inst<F64>(Opcode::FPMin64, Flags{control}, lhs, rhs);
1107 default:
1108 ThrowInvalidType(lhs.Type());
1109 }
1110}
1111
1112U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
1113 if (a.Type() != b.Type()) {
1114 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
1115 }
1116 switch (a.Type()) {
1117 case Type::U32:
1118 return Inst<U32>(Opcode::IAdd32, a, b);
1119 case Type::U64:
1120 return Inst<U64>(Opcode::IAdd64, a, b);
1121 default:
1122 ThrowInvalidType(a.Type());
1123 }
1124}
1125
1126U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) {
1127 if (a.Type() != b.Type()) {
1128 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
1129 }
1130 switch (a.Type()) {
1131 case Type::U32:
1132 return Inst<U32>(Opcode::ISub32, a, b);
1133 case Type::U64:
1134 return Inst<U64>(Opcode::ISub64, a, b);
1135 default:
1136 ThrowInvalidType(a.Type());
1137 }
1138}
1139
1140U32 IREmitter::IMul(const U32& a, const U32& b) {
1141 return Inst<U32>(Opcode::IMul32, a, b);
1142}
1143
1144U32U64 IREmitter::INeg(const U32U64& value) {
1145 switch (value.Type()) {
1146 case Type::U32:
1147 return Inst<U32>(Opcode::INeg32, value);
1148 case Type::U64:
1149 return Inst<U64>(Opcode::INeg64, value);
1150 default:
1151 ThrowInvalidType(value.Type());
1152 }
1153}
1154
1155U32 IREmitter::IAbs(const U32& value) {
1156 return Inst<U32>(Opcode::IAbs32, value);
1157}
1158
1159U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) {
1160 switch (base.Type()) {
1161 case Type::U32:
1162 return Inst<U32>(Opcode::ShiftLeftLogical32, base, shift);
1163 case Type::U64:
1164 return Inst<U64>(Opcode::ShiftLeftLogical64, base, shift);
1165 default:
1166 ThrowInvalidType(base.Type());
1167 }
1168}
1169
1170U32U64 IREmitter::ShiftRightLogical(const U32U64& base, const U32& shift) {
1171 switch (base.Type()) {
1172 case Type::U32:
1173 return Inst<U32>(Opcode::ShiftRightLogical32, base, shift);
1174 case Type::U64:
1175 return Inst<U64>(Opcode::ShiftRightLogical64, base, shift);
1176 default:
1177 ThrowInvalidType(base.Type());
1178 }
1179}
1180
1181U32U64 IREmitter::ShiftRightArithmetic(const U32U64& base, const U32& shift) {
1182 switch (base.Type()) {
1183 case Type::U32:
1184 return Inst<U32>(Opcode::ShiftRightArithmetic32, base, shift);
1185 case Type::U64:
1186 return Inst<U64>(Opcode::ShiftRightArithmetic64, base, shift);
1187 default:
1188 ThrowInvalidType(base.Type());
1189 }
1190}
1191
1192U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) {
1193 return Inst<U32>(Opcode::BitwiseAnd32, a, b);
1194}
1195
1196U32 IREmitter::BitwiseOr(const U32& a, const U32& b) {
1197 return Inst<U32>(Opcode::BitwiseOr32, a, b);
1198}
1199
1200U32 IREmitter::BitwiseXor(const U32& a, const U32& b) {
1201 return Inst<U32>(Opcode::BitwiseXor32, a, b);
1202}
1203
1204U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
1205 const U32& count) {
1206 return Inst<U32>(Opcode::BitFieldInsert, base, insert, offset, count);
1207}
1208
1209U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count,
1210 bool is_signed) {
1211 return Inst<U32>(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset,
1212 count);
1213}
1214
1215U32 IREmitter::BitReverse(const U32& value) {
1216 return Inst<U32>(Opcode::BitReverse32, value);
1217}
1218
1219U32 IREmitter::BitCount(const U32& value) {
1220 return Inst<U32>(Opcode::BitCount32, value);
1221}
1222
1223U32 IREmitter::BitwiseNot(const U32& value) {
1224 return Inst<U32>(Opcode::BitwiseNot32, value);
1225}
1226
1227U32 IREmitter::FindSMsb(const U32& value) {
1228 return Inst<U32>(Opcode::FindSMsb32, value);
1229}
1230
1231U32 IREmitter::FindUMsb(const U32& value) {
1232 return Inst<U32>(Opcode::FindUMsb32, value);
1233}
1234
1235U32 IREmitter::SMin(const U32& a, const U32& b) {
1236 return Inst<U32>(Opcode::SMin32, a, b);
1237}
1238
1239U32 IREmitter::UMin(const U32& a, const U32& b) {
1240 return Inst<U32>(Opcode::UMin32, a, b);
1241}
1242
1243U32 IREmitter::IMin(const U32& a, const U32& b, bool is_signed) {
1244 return is_signed ? SMin(a, b) : UMin(a, b);
1245}
1246
1247U32 IREmitter::SMax(const U32& a, const U32& b) {
1248 return Inst<U32>(Opcode::SMax32, a, b);
1249}
1250
1251U32 IREmitter::UMax(const U32& a, const U32& b) {
1252 return Inst<U32>(Opcode::UMax32, a, b);
1253}
1254
1255U32 IREmitter::IMax(const U32& a, const U32& b, bool is_signed) {
1256 return is_signed ? SMax(a, b) : UMax(a, b);
1257}
1258
1259U32 IREmitter::SClamp(const U32& value, const U32& min, const U32& max) {
1260 return Inst<U32>(Opcode::SClamp32, value, min, max);
1261}
1262
1263U32 IREmitter::UClamp(const U32& value, const U32& min, const U32& max) {
1264 return Inst<U32>(Opcode::UClamp32, value, min, max);
1265}
1266
1267U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) {
1268 return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs);
1269}
1270
1271U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) {
1272 if (lhs.Type() != rhs.Type()) {
1273 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1274 }
1275 switch (lhs.Type()) {
1276 case Type::U32:
1277 return Inst<U1>(Opcode::IEqual, lhs, rhs);
1278 case Type::U64: {
1279 // Manually compare the unpacked values
1280 const Value lhs_vector{UnpackUint2x32(lhs)};
1281 const Value rhs_vector{UnpackUint2x32(rhs)};
1282 return LogicalAnd(IEqual(IR::U32{CompositeExtract(lhs_vector, 0)},
1283 IR::U32{CompositeExtract(rhs_vector, 0)}),
1284 IEqual(IR::U32{CompositeExtract(lhs_vector, 1)},
1285 IR::U32{CompositeExtract(rhs_vector, 1)}));
1286 }
1287 default:
1288 ThrowInvalidType(lhs.Type());
1289 }
1290}
1291
1292U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
1293 return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs);
1294}
1295
1296U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) {
1297 return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs);
1298}
1299
1300U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) {
1301 return Inst<U1>(Opcode::INotEqual, lhs, rhs);
1302}
1303
1304U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
1305 return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs);
1306}
1307
1308U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) {
1309 return Inst<U32>(Opcode::SharedAtomicIAdd32, pointer_offset, value);
1310}
1311
1312U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) {
1313 return Inst<U32>(Opcode::SharedAtomicSMin32, pointer_offset, value);
1314}
1315
1316U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) {
1317 return Inst<U32>(Opcode::SharedAtomicUMin32, pointer_offset, value);
1318}
1319
1320U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) {
1321 return is_signed ? SharedAtomicSMin(pointer_offset, value)
1322 : SharedAtomicUMin(pointer_offset, value);
1323}
1324
1325U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) {
1326 return Inst<U32>(Opcode::SharedAtomicSMax32, pointer_offset, value);
1327}
1328
1329U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) {
1330 return Inst<U32>(Opcode::SharedAtomicUMax32, pointer_offset, value);
1331}
1332
1333U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) {
1334 return is_signed ? SharedAtomicSMax(pointer_offset, value)
1335 : SharedAtomicUMax(pointer_offset, value);
1336}
1337
1338U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) {
1339 return Inst<U32>(Opcode::SharedAtomicInc32, pointer_offset, value);
1340}
1341
1342U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) {
1343 return Inst<U32>(Opcode::SharedAtomicDec32, pointer_offset, value);
1344}
1345
1346U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) {
1347 return Inst<U32>(Opcode::SharedAtomicAnd32, pointer_offset, value);
1348}
1349
1350U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) {
1351 return Inst<U32>(Opcode::SharedAtomicOr32, pointer_offset, value);
1352}
1353
1354U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) {
1355 return Inst<U32>(Opcode::SharedAtomicXor32, pointer_offset, value);
1356}
1357
1358U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) {
1359 switch (value.Type()) {
1360 case Type::U32:
1361 return Inst<U32>(Opcode::SharedAtomicExchange32, pointer_offset, value);
1362 case Type::U64:
1363 return Inst<U64>(Opcode::SharedAtomicExchange64, pointer_offset, value);
1364 default:
1365 ThrowInvalidType(pointer_offset.Type());
1366 }
1367}
1368
1369U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) {
1370 switch (value.Type()) {
1371 case Type::U32:
1372 return Inst<U32>(Opcode::GlobalAtomicIAdd32, pointer_offset, value);
1373 case Type::U64:
1374 return Inst<U64>(Opcode::GlobalAtomicIAdd64, pointer_offset, value);
1375 default:
1376 ThrowInvalidType(value.Type());
1377 }
1378}
1379
1380U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) {
1381 switch (value.Type()) {
1382 case Type::U32:
1383 return Inst<U32>(Opcode::GlobalAtomicSMin32, pointer_offset, value);
1384 case Type::U64:
1385 return Inst<U64>(Opcode::GlobalAtomicSMin64, pointer_offset, value);
1386 default:
1387 ThrowInvalidType(value.Type());
1388 }
1389}
1390
1391U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) {
1392 switch (value.Type()) {
1393 case Type::U32:
1394 return Inst<U32>(Opcode::GlobalAtomicUMin32, pointer_offset, value);
1395 case Type::U64:
1396 return Inst<U64>(Opcode::GlobalAtomicUMin64, pointer_offset, value);
1397 default:
1398 ThrowInvalidType(value.Type());
1399 }
1400}
1401
1402U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) {
1403 return is_signed ? GlobalAtomicSMin(pointer_offset, value)
1404 : GlobalAtomicUMin(pointer_offset, value);
1405}
1406
1407U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) {
1408 switch (value.Type()) {
1409 case Type::U32:
1410 return Inst<U32>(Opcode::GlobalAtomicSMax32, pointer_offset, value);
1411 case Type::U64:
1412 return Inst<U64>(Opcode::GlobalAtomicSMax64, pointer_offset, value);
1413 default:
1414 ThrowInvalidType(value.Type());
1415 }
1416}
1417
1418U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) {
1419 switch (value.Type()) {
1420 case Type::U32:
1421 return Inst<U32>(Opcode::GlobalAtomicUMax32, pointer_offset, value);
1422 case Type::U64:
1423 return Inst<U64>(Opcode::GlobalAtomicUMax64, pointer_offset, value);
1424 default:
1425 ThrowInvalidType(value.Type());
1426 }
1427}
1428
1429U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) {
1430 return is_signed ? GlobalAtomicSMax(pointer_offset, value)
1431 : GlobalAtomicUMax(pointer_offset, value);
1432}
1433
1434U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) {
1435 return Inst<U32>(Opcode::GlobalAtomicInc32, pointer_offset, value);
1436}
1437
1438U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) {
1439 return Inst<U32>(Opcode::GlobalAtomicDec32, pointer_offset, value);
1440}
1441
1442U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) {
1443 switch (value.Type()) {
1444 case Type::U32:
1445 return Inst<U32>(Opcode::GlobalAtomicAnd32, pointer_offset, value);
1446 case Type::U64:
1447 return Inst<U64>(Opcode::GlobalAtomicAnd64, pointer_offset, value);
1448 default:
1449 ThrowInvalidType(value.Type());
1450 }
1451}
1452
1453U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) {
1454 switch (value.Type()) {
1455 case Type::U32:
1456 return Inst<U32>(Opcode::GlobalAtomicOr32, pointer_offset, value);
1457 case Type::U64:
1458 return Inst<U64>(Opcode::GlobalAtomicOr64, pointer_offset, value);
1459 default:
1460 ThrowInvalidType(value.Type());
1461 }
1462}
1463
1464U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) {
1465 switch (value.Type()) {
1466 case Type::U32:
1467 return Inst<U32>(Opcode::GlobalAtomicXor32, pointer_offset, value);
1468 case Type::U64:
1469 return Inst<U64>(Opcode::GlobalAtomicXor64, pointer_offset, value);
1470 default:
1471 ThrowInvalidType(value.Type());
1472 }
1473}
1474
1475U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) {
1476 switch (value.Type()) {
1477 case Type::U32:
1478 return Inst<U32>(Opcode::GlobalAtomicExchange32, pointer_offset, value);
1479 case Type::U64:
1480 return Inst<U64>(Opcode::GlobalAtomicExchange64, pointer_offset, value);
1481 default:
1482 ThrowInvalidType(pointer_offset.Type());
1483 }
1484}
1485
1486F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
1487 const FpControl control) {
1488 return Inst<F32>(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value);
1489}
1490
1491Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
1492 const FpControl control) {
1493 return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value);
1494}
1495
1496Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
1497 const FpControl control) {
1498 return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value);
1499}
1500
1501Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
1502 const FpControl control) {
1503 return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value);
1504}
1505
1506U1 IREmitter::LogicalOr(const U1& a, const U1& b) {
1507 return Inst<U1>(Opcode::LogicalOr, a, b);
1508}
1509
1510U1 IREmitter::LogicalAnd(const U1& a, const U1& b) {
1511 return Inst<U1>(Opcode::LogicalAnd, a, b);
1512}
1513
1514U1 IREmitter::LogicalXor(const U1& a, const U1& b) {
1515 return Inst<U1>(Opcode::LogicalXor, a, b);
1516}
1517
1518U1 IREmitter::LogicalNot(const U1& value) {
1519 return Inst<U1>(Opcode::LogicalNot, value);
1520}
1521
1522U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) {
1523 switch (bitsize) {
1524 case 16:
1525 switch (value.Type()) {
1526 case Type::F16:
1527 return Inst<U32>(Opcode::ConvertS16F16, value);
1528 case Type::F32:
1529 return Inst<U32>(Opcode::ConvertS16F32, value);
1530 case Type::F64:
1531 return Inst<U32>(Opcode::ConvertS16F64, value);
1532 default:
1533 ThrowInvalidType(value.Type());
1534 }
1535 case 32:
1536 switch (value.Type()) {
1537 case Type::F16:
1538 return Inst<U32>(Opcode::ConvertS32F16, value);
1539 case Type::F32:
1540 return Inst<U32>(Opcode::ConvertS32F32, value);
1541 case Type::F64:
1542 return Inst<U32>(Opcode::ConvertS32F64, value);
1543 default:
1544 ThrowInvalidType(value.Type());
1545 }
1546 case 64:
1547 switch (value.Type()) {
1548 case Type::F16:
1549 return Inst<U64>(Opcode::ConvertS64F16, value);
1550 case Type::F32:
1551 return Inst<U64>(Opcode::ConvertS64F32, value);
1552 case Type::F64:
1553 return Inst<U64>(Opcode::ConvertS64F64, value);
1554 default:
1555 ThrowInvalidType(value.Type());
1556 }
1557 default:
1558 throw InvalidArgument("Invalid destination bitsize {}", bitsize);
1559 }
1560}
1561
1562U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) {
1563 switch (bitsize) {
1564 case 16:
1565 switch (value.Type()) {
1566 case Type::F16:
1567 return Inst<U32>(Opcode::ConvertU16F16, value);
1568 case Type::F32:
1569 return Inst<U32>(Opcode::ConvertU16F32, value);
1570 case Type::F64:
1571 return Inst<U32>(Opcode::ConvertU16F64, value);
1572 default:
1573 ThrowInvalidType(value.Type());
1574 }
1575 case 32:
1576 switch (value.Type()) {
1577 case Type::F16:
1578 return Inst<U32>(Opcode::ConvertU32F16, value);
1579 case Type::F32:
1580 return Inst<U32>(Opcode::ConvertU32F32, value);
1581 case Type::F64:
1582 return Inst<U32>(Opcode::ConvertU32F64, value);
1583 default:
1584 ThrowInvalidType(value.Type());
1585 }
1586 case 64:
1587 switch (value.Type()) {
1588 case Type::F16:
1589 return Inst<U64>(Opcode::ConvertU64F16, value);
1590 case Type::F32:
1591 return Inst<U64>(Opcode::ConvertU64F32, value);
1592 case Type::F64:
1593 return Inst<U64>(Opcode::ConvertU64F64, value);
1594 default:
1595 ThrowInvalidType(value.Type());
1596 }
1597 default:
1598 throw InvalidArgument("Invalid destination bitsize {}", bitsize);
1599 }
1600}
1601
1602U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value) {
1603 return is_signed ? ConvertFToS(bitsize, value) : ConvertFToU(bitsize, value);
1604}
1605
1606F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
1607 FpControl control) {
1608 switch (dest_bitsize) {
1609 case 16:
1610 switch (src_bitsize) {
1611 case 8:
1612 return Inst<F16>(Opcode::ConvertF16S8, Flags{control}, value);
1613 case 16:
1614 return Inst<F16>(Opcode::ConvertF16S16, Flags{control}, value);
1615 case 32:
1616 return Inst<F16>(Opcode::ConvertF16S32, Flags{control}, value);
1617 case 64:
1618 return Inst<F16>(Opcode::ConvertF16S64, Flags{control}, value);
1619 }
1620 break;
1621 case 32:
1622 switch (src_bitsize) {
1623 case 8:
1624 return Inst<F32>(Opcode::ConvertF32S8, Flags{control}, value);
1625 case 16:
1626 return Inst<F32>(Opcode::ConvertF32S16, Flags{control}, value);
1627 case 32:
1628 return Inst<F32>(Opcode::ConvertF32S32, Flags{control}, value);
1629 case 64:
1630 return Inst<F32>(Opcode::ConvertF32S64, Flags{control}, value);
1631 }
1632 break;
1633 case 64:
1634 switch (src_bitsize) {
1635 case 8:
1636 return Inst<F64>(Opcode::ConvertF64S8, Flags{control}, value);
1637 case 16:
1638 return Inst<F64>(Opcode::ConvertF64S16, Flags{control}, value);
1639 case 32:
1640 return Inst<F64>(Opcode::ConvertF64S32, Flags{control}, value);
1641 case 64:
1642 return Inst<F64>(Opcode::ConvertF64S64, Flags{control}, value);
1643 }
1644 break;
1645 }
1646 throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
1647}
1648
1649F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
1650 FpControl control) {
1651 switch (dest_bitsize) {
1652 case 16:
1653 switch (src_bitsize) {
1654 case 8:
1655 return Inst<F16>(Opcode::ConvertF16U8, Flags{control}, value);
1656 case 16:
1657 return Inst<F16>(Opcode::ConvertF16U16, Flags{control}, value);
1658 case 32:
1659 return Inst<F16>(Opcode::ConvertF16U32, Flags{control}, value);
1660 case 64:
1661 return Inst<F16>(Opcode::ConvertF16U64, Flags{control}, value);
1662 }
1663 break;
1664 case 32:
1665 switch (src_bitsize) {
1666 case 8:
1667 return Inst<F32>(Opcode::ConvertF32U8, Flags{control}, value);
1668 case 16:
1669 return Inst<F32>(Opcode::ConvertF32U16, Flags{control}, value);
1670 case 32:
1671 return Inst<F32>(Opcode::ConvertF32U32, Flags{control}, value);
1672 case 64:
1673 return Inst<F32>(Opcode::ConvertF32U64, Flags{control}, value);
1674 }
1675 break;
1676 case 64:
1677 switch (src_bitsize) {
1678 case 8:
1679 return Inst<F64>(Opcode::ConvertF64U8, Flags{control}, value);
1680 case 16:
1681 return Inst<F64>(Opcode::ConvertF64U16, Flags{control}, value);
1682 case 32:
1683 return Inst<F64>(Opcode::ConvertF64U32, Flags{control}, value);
1684 case 64:
1685 return Inst<F64>(Opcode::ConvertF64U64, Flags{control}, value);
1686 }
1687 break;
1688 }
1689 throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
1690}
1691
1692F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
1693 const Value& value, FpControl control) {
1694 return is_signed ? ConvertSToF(dest_bitsize, src_bitsize, value, control)
1695 : ConvertUToF(dest_bitsize, src_bitsize, value, control);
1696}
1697
1698U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) {
1699 switch (result_bitsize) {
1700 case 32:
1701 switch (value.Type()) {
1702 case Type::U32:
1703 // Nothing to do
1704 return value;
1705 case Type::U64:
1706 return Inst<U32>(Opcode::ConvertU32U64, value);
1707 default:
1708 break;
1709 }
1710 break;
1711 case 64:
1712 switch (value.Type()) {
1713 case Type::U32:
1714 return Inst<U64>(Opcode::ConvertU64U32, value);
1715 case Type::U64:
1716 // Nothing to do
1717 return value;
1718 default:
1719 break;
1720 }
1721 }
1722 throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
1723}
1724
1725F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value, FpControl control) {
1726 switch (result_bitsize) {
1727 case 16:
1728 switch (value.Type()) {
1729 case Type::F16:
1730 // Nothing to do
1731 return value;
1732 case Type::F32:
1733 return Inst<F16>(Opcode::ConvertF16F32, Flags{control}, value);
1734 case Type::F64:
1735 throw LogicError("Illegal conversion from F64 to F16");
1736 default:
1737 break;
1738 }
1739 break;
1740 case 32:
1741 switch (value.Type()) {
1742 case Type::F16:
1743 return Inst<F32>(Opcode::ConvertF32F16, Flags{control}, value);
1744 case Type::F32:
1745 // Nothing to do
1746 return value;
1747 case Type::F64:
1748 return Inst<F32>(Opcode::ConvertF32F64, Flags{control}, value);
1749 default:
1750 break;
1751 }
1752 break;
1753 case 64:
1754 switch (value.Type()) {
1755 case Type::F16:
1756 throw LogicError("Illegal conversion from F16 to F64");
1757 case Type::F32:
1758 return Inst<F64>(Opcode::ConvertF64F32, Flags{control}, value);
1759 case Type::F64:
1760 // Nothing to do
1761 return value;
1762 default:
1763 break;
1764 }
1765 break;
1766 }
1767 throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
1768}
1769
1770Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
1771 const Value& offset, const F32& lod_clamp,
1772 TextureInstInfo info) {
1773 const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
1774 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod
1775 : Opcode::BindlessImageSampleImplicitLod};
1776 return Inst(op, Flags{info}, handle, coords, bias_lc, offset);
1777}
1778
1779Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod,
1780 const Value& offset, TextureInstInfo info) {
1781 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod
1782 : Opcode::BindlessImageSampleExplicitLod};
1783 return Inst(op, Flags{info}, handle, coords, lod, offset);
1784}
1785
1786F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref,
1787 const F32& bias, const Value& offset,
1788 const F32& lod_clamp, TextureInstInfo info) {
1789 const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
1790 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod
1791 : Opcode::BindlessImageSampleDrefImplicitLod};
1792 return Inst<F32>(op, Flags{info}, handle, coords, dref, bias_lc, offset);
1793}
1794
1795F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref,
1796 const F32& lod, const Value& offset,
1797 TextureInstInfo info) {
1798 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod
1799 : Opcode::BindlessImageSampleDrefExplicitLod};
1800 return Inst<F32>(op, Flags{info}, handle, coords, dref, lod, offset);
1801}
1802
1803Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset,
1804 const Value& offset2, TextureInstInfo info) {
1805 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGather : Opcode::BindlessImageGather};
1806 return Inst(op, Flags{info}, handle, coords, offset, offset2);
1807}
1808
1809Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset,
1810 const Value& offset2, const F32& dref, TextureInstInfo info) {
1811 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref
1812 : Opcode::BindlessImageGatherDref};
1813 return Inst(op, Flags{info}, handle, coords, offset, offset2, dref);
1814}
1815
1816Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset,
1817 const U32& lod, const U32& multisampling, TextureInstInfo info) {
1818 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageFetch : Opcode::BindlessImageFetch};
1819 return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling);
1820}
1821
1822Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) {
1823 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions
1824 : Opcode::BindlessImageQueryDimensions};
1825 return Inst(op, handle, lod);
1826}
1827
1828Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) {
1829 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryLod
1830 : Opcode::BindlessImageQueryLod};
1831 return Inst(op, Flags{info}, handle, coords);
1832}
1833
1834Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivates,
1835 const Value& offset, const F32& lod_clamp, TextureInstInfo info) {
1836 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient
1837 : Opcode::BindlessImageGradient};
1838 return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp);
1839}
1840
1841Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) {
1842 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageRead : Opcode::BindlessImageRead};
1843 return Inst(op, Flags{info}, handle, coords);
1844}
1845
1846void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color,
1847 TextureInstInfo info) {
1848 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite};
1849 Inst(op, Flags{info}, handle, coords, color);
1850}
1851
1852Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value,
1853 TextureInstInfo info) {
1854 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicIAdd32
1855 : Opcode::BindlessImageAtomicIAdd32};
1856 return Inst(op, Flags{info}, handle, coords, value);
1857}
1858
1859Value IREmitter::ImageAtomicSMin(const Value& handle, const Value& coords, const Value& value,
1860 TextureInstInfo info) {
1861 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMin32
1862 : Opcode::BindlessImageAtomicSMin32};
1863 return Inst(op, Flags{info}, handle, coords, value);
1864}
1865
1866Value IREmitter::ImageAtomicUMin(const Value& handle, const Value& coords, const Value& value,
1867 TextureInstInfo info) {
1868 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMin32
1869 : Opcode::BindlessImageAtomicUMin32};
1870 return Inst(op, Flags{info}, handle, coords, value);
1871}
1872
1873Value IREmitter::ImageAtomicIMin(const Value& handle, const Value& coords, const Value& value,
1874 bool is_signed, TextureInstInfo info) {
1875 return is_signed ? ImageAtomicSMin(handle, coords, value, info)
1876 : ImageAtomicUMin(handle, coords, value, info);
1877}
1878
1879Value IREmitter::ImageAtomicSMax(const Value& handle, const Value& coords, const Value& value,
1880 TextureInstInfo info) {
1881 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMax32
1882 : Opcode::BindlessImageAtomicSMax32};
1883 return Inst(op, Flags{info}, handle, coords, value);
1884}
1885
1886Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value,
1887 TextureInstInfo info) {
1888 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMax32
1889 : Opcode::BindlessImageAtomicUMax32};
1890 return Inst(op, Flags{info}, handle, coords, value);
1891}
1892
1893Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value,
1894 bool is_signed, TextureInstInfo info) {
1895 return is_signed ? ImageAtomicSMax(handle, coords, value, info)
1896 : ImageAtomicUMax(handle, coords, value, info);
1897}
1898
1899Value IREmitter::ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
1900 TextureInstInfo info) {
1901 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicInc32
1902 : Opcode::BindlessImageAtomicInc32};
1903 return Inst(op, Flags{info}, handle, coords, value);
1904}
1905
1906Value IREmitter::ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
1907 TextureInstInfo info) {
1908 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicDec32
1909 : Opcode::BindlessImageAtomicDec32};
1910 return Inst(op, Flags{info}, handle, coords, value);
1911}
1912
1913Value IREmitter::ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
1914 TextureInstInfo info) {
1915 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicAnd32
1916 : Opcode::BindlessImageAtomicAnd32};
1917 return Inst(op, Flags{info}, handle, coords, value);
1918}
1919
1920Value IREmitter::ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
1921 TextureInstInfo info) {
1922 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicOr32
1923 : Opcode::BindlessImageAtomicOr32};
1924 return Inst(op, Flags{info}, handle, coords, value);
1925}
1926
1927Value IREmitter::ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
1928 TextureInstInfo info) {
1929 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicXor32
1930 : Opcode::BindlessImageAtomicXor32};
1931 return Inst(op, Flags{info}, handle, coords, value);
1932}
1933
1934Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value,
1935 TextureInstInfo info) {
1936 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicExchange32
1937 : Opcode::BindlessImageAtomicExchange32};
1938 return Inst(op, Flags{info}, handle, coords, value);
1939}
1940
1941U1 IREmitter::VoteAll(const U1& value) {
1942 return Inst<U1>(Opcode::VoteAll, value);
1943}
1944
1945U1 IREmitter::VoteAny(const U1& value) {
1946 return Inst<U1>(Opcode::VoteAny, value);
1947}
1948
1949U1 IREmitter::VoteEqual(const U1& value) {
1950 return Inst<U1>(Opcode::VoteEqual, value);
1951}
1952
1953U32 IREmitter::SubgroupBallot(const U1& value) {
1954 return Inst<U32>(Opcode::SubgroupBallot, value);
1955}
1956
1957U32 IREmitter::SubgroupEqMask() {
1958 return Inst<U32>(Opcode::SubgroupEqMask);
1959}
1960
1961U32 IREmitter::SubgroupLtMask() {
1962 return Inst<U32>(Opcode::SubgroupLtMask);
1963}
1964
1965U32 IREmitter::SubgroupLeMask() {
1966 return Inst<U32>(Opcode::SubgroupLeMask);
1967}
1968
1969U32 IREmitter::SubgroupGtMask() {
1970 return Inst<U32>(Opcode::SubgroupGtMask);
1971}
1972
1973U32 IREmitter::SubgroupGeMask() {
1974 return Inst<U32>(Opcode::SubgroupGeMask);
1975}
1976
1977U32 IREmitter::ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
1978 const IR::U32& seg_mask) {
1979 return Inst<U32>(Opcode::ShuffleIndex, value, index, clamp, seg_mask);
1980}
1981
1982U32 IREmitter::ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
1983 const IR::U32& seg_mask) {
1984 return Inst<U32>(Opcode::ShuffleUp, value, index, clamp, seg_mask);
1985}
1986
1987U32 IREmitter::ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
1988 const IR::U32& seg_mask) {
1989 return Inst<U32>(Opcode::ShuffleDown, value, index, clamp, seg_mask);
1990}
1991
1992U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
1993 const IR::U32& seg_mask) {
1994 return Inst<U32>(Opcode::ShuffleButterfly, value, index, clamp, seg_mask);
1995}
1996
1997F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control) {
1998 return Inst<F32>(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle);
1999}
2000
2001F32 IREmitter::DPdxFine(const F32& a) {
2002 return Inst<F32>(Opcode::DPdxFine, a);
2003}
2004
2005F32 IREmitter::DPdyFine(const F32& a) {
2006 return Inst<F32>(Opcode::DPdyFine, a);
2007}
2008
2009F32 IREmitter::DPdxCoarse(const F32& a) {
2010 return Inst<F32>(Opcode::DPdxCoarse, a);
2011}
2012
2013F32 IREmitter::DPdyCoarse(const F32& a) {
2014 return Inst<F32>(Opcode::DPdyCoarse, a);
2015}
2016
2017} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
new file mode 100644
index 000000000..53f7b3b06
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -0,0 +1,413 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstring>
8#include <type_traits>
9
10#include "shader_recompiler/frontend/ir/attribute.h"
11#include "shader_recompiler/frontend/ir/basic_block.h"
12#include "shader_recompiler/frontend/ir/modifiers.h"
13#include "shader_recompiler/frontend/ir/value.h"
14
15namespace Shader::IR {
16
17class IREmitter {
18public:
19 explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {}
20 explicit IREmitter(Block& block_, Block::iterator insertion_point_)
21 : block{&block_}, insertion_point{insertion_point_} {}
22
23 Block* block;
24
25 [[nodiscard]] U1 Imm1(bool value) const;
26 [[nodiscard]] U8 Imm8(u8 value) const;
27 [[nodiscard]] U16 Imm16(u16 value) const;
28 [[nodiscard]] U32 Imm32(u32 value) const;
29 [[nodiscard]] U32 Imm32(s32 value) const;
30 [[nodiscard]] F32 Imm32(f32 value) const;
31 [[nodiscard]] U64 Imm64(u64 value) const;
32 [[nodiscard]] U64 Imm64(s64 value) const;
33 [[nodiscard]] F64 Imm64(f64 value) const;
34
35 U1 ConditionRef(const U1& value);
36 void Reference(const Value& value);
37
38 void PhiMove(IR::Inst& phi, const Value& value);
39
40 void Prologue();
41 void Epilogue();
42 void DemoteToHelperInvocation();
43 void EmitVertex(const U32& stream);
44 void EndPrimitive(const U32& stream);
45
46 [[nodiscard]] U32 GetReg(IR::Reg reg);
47 void SetReg(IR::Reg reg, const U32& value);
48
49 [[nodiscard]] U1 GetPred(IR::Pred pred, bool is_negated = false);
50 void SetPred(IR::Pred pred, const U1& value);
51
52 [[nodiscard]] U1 GetGotoVariable(u32 id);
53 void SetGotoVariable(u32 id, const U1& value);
54
55 [[nodiscard]] U32 GetIndirectBranchVariable();
56 void SetIndirectBranchVariable(const U32& value);
57
58 [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset);
59 [[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
60 bool is_signed);
61 [[nodiscard]] F32 GetFloatCbuf(const U32& binding, const U32& byte_offset);
62
63 [[nodiscard]] U1 GetZFlag();
64 [[nodiscard]] U1 GetSFlag();
65 [[nodiscard]] U1 GetCFlag();
66 [[nodiscard]] U1 GetOFlag();
67
68 void SetZFlag(const U1& value);
69 void SetSFlag(const U1& value);
70 void SetCFlag(const U1& value);
71 void SetOFlag(const U1& value);
72
73 [[nodiscard]] U1 Condition(IR::Condition cond);
74 [[nodiscard]] U1 GetFlowTestResult(FlowTest test);
75
76 [[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
77 [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex);
78 void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex);
79
80 [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address);
81 [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex);
82 void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex);
83
84 [[nodiscard]] F32 GetPatch(Patch patch);
85 void SetPatch(Patch patch, const F32& value);
86
87 void SetFragColor(u32 index, u32 component, const F32& value);
88 void SetSampleMask(const U32& value);
89 void SetFragDepth(const F32& value);
90
91 [[nodiscard]] U32 WorkgroupIdX();
92 [[nodiscard]] U32 WorkgroupIdY();
93 [[nodiscard]] U32 WorkgroupIdZ();
94
95 [[nodiscard]] Value LocalInvocationId();
96 [[nodiscard]] U32 LocalInvocationIdX();
97 [[nodiscard]] U32 LocalInvocationIdY();
98 [[nodiscard]] U32 LocalInvocationIdZ();
99
100 [[nodiscard]] U32 InvocationId();
101 [[nodiscard]] U32 SampleId();
102 [[nodiscard]] U1 IsHelperInvocation();
103 [[nodiscard]] F32 YDirection();
104
105 [[nodiscard]] U32 LaneId();
106
107 [[nodiscard]] U32 LoadGlobalU8(const U64& address);
108 [[nodiscard]] U32 LoadGlobalS8(const U64& address);
109 [[nodiscard]] U32 LoadGlobalU16(const U64& address);
110 [[nodiscard]] U32 LoadGlobalS16(const U64& address);
111 [[nodiscard]] U32 LoadGlobal32(const U64& address);
112 [[nodiscard]] Value LoadGlobal64(const U64& address);
113 [[nodiscard]] Value LoadGlobal128(const U64& address);
114
115 void WriteGlobalU8(const U64& address, const U32& value);
116 void WriteGlobalS8(const U64& address, const U32& value);
117 void WriteGlobalU16(const U64& address, const U32& value);
118 void WriteGlobalS16(const U64& address, const U32& value);
119 void WriteGlobal32(const U64& address, const U32& value);
120 void WriteGlobal64(const U64& address, const IR::Value& vector);
121 void WriteGlobal128(const U64& address, const IR::Value& vector);
122
123 [[nodiscard]] U32 LoadLocal(const U32& word_offset);
124 void WriteLocal(const U32& word_offset, const U32& value);
125
126 [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
127 void WriteShared(int bit_size, const U32& offset, const Value& value);
128
129 [[nodiscard]] U1 GetZeroFromOp(const Value& op);
130 [[nodiscard]] U1 GetSignFromOp(const Value& op);
131 [[nodiscard]] U1 GetCarryFromOp(const Value& op);
132 [[nodiscard]] U1 GetOverflowFromOp(const Value& op);
133 [[nodiscard]] U1 GetSparseFromOp(const Value& op);
134 [[nodiscard]] U1 GetInBoundsFromOp(const Value& op);
135
136 [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
137 [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
138 [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
139 const Value& e4);
140 [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
141 [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
142
143 [[nodiscard]] Value Select(const U1& condition, const Value& true_value,
144 const Value& false_value);
145
146 void Barrier();
147 void WorkgroupMemoryBarrier();
148 void DeviceMemoryBarrier();
149
150 template <typename Dest, typename Source>
151 [[nodiscard]] Dest BitCast(const Source& value);
152
153 [[nodiscard]] U64 PackUint2x32(const Value& vector);
154 [[nodiscard]] Value UnpackUint2x32(const U64& value);
155
156 [[nodiscard]] U32 PackFloat2x16(const Value& vector);
157 [[nodiscard]] Value UnpackFloat2x16(const U32& value);
158
159 [[nodiscard]] U32 PackHalf2x16(const Value& vector);
160 [[nodiscard]] Value UnpackHalf2x16(const U32& value);
161
162 [[nodiscard]] F64 PackDouble2x32(const Value& vector);
163 [[nodiscard]] Value UnpackDouble2x32(const F64& value);
164
165 [[nodiscard]] F16F32F64 FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
166 [[nodiscard]] F16F32F64 FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
167 [[nodiscard]] F16F32F64 FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c,
168 FpControl control = {});
169
170 [[nodiscard]] F16F32F64 FPAbs(const F16F32F64& value);
171 [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value);
172 [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg);
173
174 [[nodiscard]] F32 FPCos(const F32& value);
175 [[nodiscard]] F32 FPSin(const F32& value);
176 [[nodiscard]] F32 FPExp2(const F32& value);
177 [[nodiscard]] F32 FPLog2(const F32& value);
178 [[nodiscard]] F32F64 FPRecip(const F32F64& value);
179 [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
180 [[nodiscard]] F32 FPSqrt(const F32& value);
181 [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value);
182 [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value,
183 const F16F32F64& max_value);
184 [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {});
185 [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {});
186 [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {});
187 [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {});
188
189 [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
190 bool ordered = true);
191 [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
192 bool ordered = true);
193 [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
194 bool ordered = true);
195 [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs,
196 FpControl control = {}, bool ordered = true);
197 [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
198 FpControl control = {}, bool ordered = true);
199 [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
200 FpControl control = {}, bool ordered = true);
201 [[nodiscard]] U1 FPIsNan(const F16F32F64& value);
202 [[nodiscard]] U1 FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs);
203 [[nodiscard]] U1 FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs);
204 [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
205 [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
206
207 [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
208 [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
209 [[nodiscard]] U32 IMul(const U32& a, const U32& b);
210 [[nodiscard]] U32U64 INeg(const U32U64& value);
211 [[nodiscard]] U32 IAbs(const U32& value);
212 [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift);
213 [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift);
214 [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift);
215 [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b);
216 [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b);
217 [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b);
218 [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
219 const U32& count);
220 [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
221 bool is_signed = false);
222 [[nodiscard]] U32 BitReverse(const U32& value);
223 [[nodiscard]] U32 BitCount(const U32& value);
224 [[nodiscard]] U32 BitwiseNot(const U32& value);
225
226 [[nodiscard]] U32 FindSMsb(const U32& value);
227 [[nodiscard]] U32 FindUMsb(const U32& value);
228 [[nodiscard]] U32 SMin(const U32& a, const U32& b);
229 [[nodiscard]] U32 UMin(const U32& a, const U32& b);
230 [[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed);
231 [[nodiscard]] U32 SMax(const U32& a, const U32& b);
232 [[nodiscard]] U32 UMax(const U32& a, const U32& b);
233 [[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed);
234 [[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max);
235 [[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max);
236
237 [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed);
238 [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs);
239 [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
240 [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
241 [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
242 [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
243
244 [[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value);
245 [[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value);
246 [[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value);
247 [[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed);
248 [[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value);
249 [[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value);
250 [[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed);
251 [[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value);
252 [[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value);
253 [[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value);
254 [[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value);
255 [[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value);
256 [[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value);
257
258 [[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value);
259 [[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value);
260 [[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value);
261 [[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value,
262 bool is_signed);
263 [[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value);
264 [[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value);
265 [[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value,
266 bool is_signed);
267 [[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value);
268 [[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value);
269 [[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value);
270 [[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value);
271 [[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value);
272 [[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value);
273
274 [[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
275 const FpControl control = {});
276 [[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
277 const FpControl control = {});
278 [[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
279 const FpControl control = {});
280 [[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
281 const FpControl control = {});
282
283 [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
284 [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
285 [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
286 [[nodiscard]] U1 LogicalNot(const U1& value);
287
288 [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value);
289 [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
290 [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);
291 [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
292 FpControl control = {});
293 [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
294 FpControl control = {});
295 [[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
296 const Value& value, FpControl control = {});
297
298 [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
299 [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value,
300 FpControl control = {});
301
302 [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
303 const F32& bias, const Value& offset,
304 const F32& lod_clamp, TextureInstInfo info);
305 [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords,
306 const F32& lod, const Value& offset,
307 TextureInstInfo info);
308 [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords,
309 const F32& dref, const F32& bias,
310 const Value& offset, const F32& lod_clamp,
311 TextureInstInfo info);
312 [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords,
313 const F32& dref, const F32& lod,
314 const Value& offset, TextureInstInfo info);
315 [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod);
316
317 [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords,
318 TextureInstInfo info);
319 [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset,
320 const Value& offset2, TextureInstInfo info);
321 [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords,
322 const Value& offset, const Value& offset2, const F32& dref,
323 TextureInstInfo info);
324 [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
325 const U32& lod, const U32& multisampling, TextureInstInfo info);
326 [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
327 const Value& derivates, const Value& offset,
328 const F32& lod_clamp, TextureInstInfo info);
329 [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info);
330 [[nodiscard]] void ImageWrite(const Value& handle, const Value& coords, const Value& color,
331 TextureInstInfo info);
332
333 [[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords,
334 const Value& value, TextureInstInfo info);
335 [[nodiscard]] Value ImageAtomicSMin(const Value& handle, const Value& coords,
336 const Value& value, TextureInstInfo info);
337 [[nodiscard]] Value ImageAtomicUMin(const Value& handle, const Value& coords,
338 const Value& value, TextureInstInfo info);
339 [[nodiscard]] Value ImageAtomicIMin(const Value& handle, const Value& coords,
340 const Value& value, bool is_signed, TextureInstInfo info);
341 [[nodiscard]] Value ImageAtomicSMax(const Value& handle, const Value& coords,
342 const Value& value, TextureInstInfo info);
343 [[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords,
344 const Value& value, TextureInstInfo info);
345 [[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords,
346 const Value& value, bool is_signed, TextureInstInfo info);
347 [[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
348 TextureInstInfo info);
349 [[nodiscard]] Value ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
350 TextureInstInfo info);
351 [[nodiscard]] Value ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
352 TextureInstInfo info);
353 [[nodiscard]] Value ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
354 TextureInstInfo info);
355 [[nodiscard]] Value ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
356 TextureInstInfo info);
357 [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
358 const Value& value, TextureInstInfo info);
359 [[nodiscard]] U1 VoteAll(const U1& value);
360 [[nodiscard]] U1 VoteAny(const U1& value);
361 [[nodiscard]] U1 VoteEqual(const U1& value);
362 [[nodiscard]] U32 SubgroupBallot(const U1& value);
363 [[nodiscard]] U32 SubgroupEqMask();
364 [[nodiscard]] U32 SubgroupLtMask();
365 [[nodiscard]] U32 SubgroupLeMask();
366 [[nodiscard]] U32 SubgroupGtMask();
367 [[nodiscard]] U32 SubgroupGeMask();
368 [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
369 const IR::U32& seg_mask);
370 [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
371 const IR::U32& seg_mask);
372 [[nodiscard]] U32 ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
373 const IR::U32& seg_mask);
374 [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index,
375 const IR::U32& clamp, const IR::U32& seg_mask);
376 [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle,
377 FpControl control = {});
378
379 [[nodiscard]] F32 DPdxFine(const F32& a);
380
381 [[nodiscard]] F32 DPdyFine(const F32& a);
382
383 [[nodiscard]] F32 DPdxCoarse(const F32& a);
384
385 [[nodiscard]] F32 DPdyCoarse(const F32& a);
386
387private:
388 IR::Block::iterator insertion_point;
389
390 template <typename T = Value, typename... Args>
391 T Inst(Opcode op, Args... args) {
392 auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})};
393 return T{Value{&*it}};
394 }
395
396 template <typename T>
397 requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) struct Flags {
398 Flags() = default;
399 Flags(T proxy_) : proxy{proxy_} {}
400
401 T proxy;
402 };
403
404 template <typename T = Value, typename FlagType, typename... Args>
405 T Inst(Opcode op, Flags<FlagType> flags, Args... args) {
406 u32 raw_flags{};
407 std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
408 auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
409 return T{Value{&*it}};
410 }
411};
412
413} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
new file mode 100644
index 000000000..3dfa5a880
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -0,0 +1,411 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/ir/type.h"
10#include "shader_recompiler/frontend/ir/value.h"
11
12namespace Shader::IR {
13namespace {
14void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
15 if (inst && inst->GetOpcode() != opcode) {
16 throw LogicError("Invalid pseudo-instruction");
17 }
18}
19
20void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) {
21 if (dest_inst) {
22 throw LogicError("Only one of each type of pseudo-op allowed");
23 }
24 dest_inst = pseudo_inst;
25}
26
27void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
28 if (inst->GetOpcode() != expected_opcode) {
29 throw LogicError("Undoing use of invalid pseudo-op");
30 }
31 inst = nullptr;
32}
33
34void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) {
35 if (!associated_insts) {
36 associated_insts = std::make_unique<AssociatedInsts>();
37 }
38}
39} // Anonymous namespace
40
41Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
42 if (op == Opcode::Phi) {
43 std::construct_at(&phi_args);
44 } else {
45 std::construct_at(&args);
46 }
47}
48
49Inst::~Inst() {
50 if (op == Opcode::Phi) {
51 std::destroy_at(&phi_args);
52 } else {
53 std::destroy_at(&args);
54 }
55}
56
57bool Inst::MayHaveSideEffects() const noexcept {
58 switch (op) {
59 case Opcode::ConditionRef:
60 case Opcode::Reference:
61 case Opcode::PhiMove:
62 case Opcode::Prologue:
63 case Opcode::Epilogue:
64 case Opcode::Join:
65 case Opcode::DemoteToHelperInvocation:
66 case Opcode::Barrier:
67 case Opcode::WorkgroupMemoryBarrier:
68 case Opcode::DeviceMemoryBarrier:
69 case Opcode::EmitVertex:
70 case Opcode::EndPrimitive:
71 case Opcode::SetAttribute:
72 case Opcode::SetAttributeIndexed:
73 case Opcode::SetPatch:
74 case Opcode::SetFragColor:
75 case Opcode::SetSampleMask:
76 case Opcode::SetFragDepth:
77 case Opcode::WriteGlobalU8:
78 case Opcode::WriteGlobalS8:
79 case Opcode::WriteGlobalU16:
80 case Opcode::WriteGlobalS16:
81 case Opcode::WriteGlobal32:
82 case Opcode::WriteGlobal64:
83 case Opcode::WriteGlobal128:
84 case Opcode::WriteStorageU8:
85 case Opcode::WriteStorageS8:
86 case Opcode::WriteStorageU16:
87 case Opcode::WriteStorageS16:
88 case Opcode::WriteStorage32:
89 case Opcode::WriteStorage64:
90 case Opcode::WriteStorage128:
91 case Opcode::WriteLocal:
92 case Opcode::WriteSharedU8:
93 case Opcode::WriteSharedU16:
94 case Opcode::WriteSharedU32:
95 case Opcode::WriteSharedU64:
96 case Opcode::WriteSharedU128:
97 case Opcode::SharedAtomicIAdd32:
98 case Opcode::SharedAtomicSMin32:
99 case Opcode::SharedAtomicUMin32:
100 case Opcode::SharedAtomicSMax32:
101 case Opcode::SharedAtomicUMax32:
102 case Opcode::SharedAtomicInc32:
103 case Opcode::SharedAtomicDec32:
104 case Opcode::SharedAtomicAnd32:
105 case Opcode::SharedAtomicOr32:
106 case Opcode::SharedAtomicXor32:
107 case Opcode::SharedAtomicExchange32:
108 case Opcode::SharedAtomicExchange64:
109 case Opcode::GlobalAtomicIAdd32:
110 case Opcode::GlobalAtomicSMin32:
111 case Opcode::GlobalAtomicUMin32:
112 case Opcode::GlobalAtomicSMax32:
113 case Opcode::GlobalAtomicUMax32:
114 case Opcode::GlobalAtomicInc32:
115 case Opcode::GlobalAtomicDec32:
116 case Opcode::GlobalAtomicAnd32:
117 case Opcode::GlobalAtomicOr32:
118 case Opcode::GlobalAtomicXor32:
119 case Opcode::GlobalAtomicExchange32:
120 case Opcode::GlobalAtomicIAdd64:
121 case Opcode::GlobalAtomicSMin64:
122 case Opcode::GlobalAtomicUMin64:
123 case Opcode::GlobalAtomicSMax64:
124 case Opcode::GlobalAtomicUMax64:
125 case Opcode::GlobalAtomicAnd64:
126 case Opcode::GlobalAtomicOr64:
127 case Opcode::GlobalAtomicXor64:
128 case Opcode::GlobalAtomicExchange64:
129 case Opcode::GlobalAtomicAddF32:
130 case Opcode::GlobalAtomicAddF16x2:
131 case Opcode::GlobalAtomicAddF32x2:
132 case Opcode::GlobalAtomicMinF16x2:
133 case Opcode::GlobalAtomicMinF32x2:
134 case Opcode::GlobalAtomicMaxF16x2:
135 case Opcode::GlobalAtomicMaxF32x2:
136 case Opcode::StorageAtomicIAdd32:
137 case Opcode::StorageAtomicSMin32:
138 case Opcode::StorageAtomicUMin32:
139 case Opcode::StorageAtomicSMax32:
140 case Opcode::StorageAtomicUMax32:
141 case Opcode::StorageAtomicInc32:
142 case Opcode::StorageAtomicDec32:
143 case Opcode::StorageAtomicAnd32:
144 case Opcode::StorageAtomicOr32:
145 case Opcode::StorageAtomicXor32:
146 case Opcode::StorageAtomicExchange32:
147 case Opcode::StorageAtomicIAdd64:
148 case Opcode::StorageAtomicSMin64:
149 case Opcode::StorageAtomicUMin64:
150 case Opcode::StorageAtomicSMax64:
151 case Opcode::StorageAtomicUMax64:
152 case Opcode::StorageAtomicAnd64:
153 case Opcode::StorageAtomicOr64:
154 case Opcode::StorageAtomicXor64:
155 case Opcode::StorageAtomicExchange64:
156 case Opcode::StorageAtomicAddF32:
157 case Opcode::StorageAtomicAddF16x2:
158 case Opcode::StorageAtomicAddF32x2:
159 case Opcode::StorageAtomicMinF16x2:
160 case Opcode::StorageAtomicMinF32x2:
161 case Opcode::StorageAtomicMaxF16x2:
162 case Opcode::StorageAtomicMaxF32x2:
163 case Opcode::BindlessImageWrite:
164 case Opcode::BoundImageWrite:
165 case Opcode::ImageWrite:
166 case IR::Opcode::BindlessImageAtomicIAdd32:
167 case IR::Opcode::BindlessImageAtomicSMin32:
168 case IR::Opcode::BindlessImageAtomicUMin32:
169 case IR::Opcode::BindlessImageAtomicSMax32:
170 case IR::Opcode::BindlessImageAtomicUMax32:
171 case IR::Opcode::BindlessImageAtomicInc32:
172 case IR::Opcode::BindlessImageAtomicDec32:
173 case IR::Opcode::BindlessImageAtomicAnd32:
174 case IR::Opcode::BindlessImageAtomicOr32:
175 case IR::Opcode::BindlessImageAtomicXor32:
176 case IR::Opcode::BindlessImageAtomicExchange32:
177 case IR::Opcode::BoundImageAtomicIAdd32:
178 case IR::Opcode::BoundImageAtomicSMin32:
179 case IR::Opcode::BoundImageAtomicUMin32:
180 case IR::Opcode::BoundImageAtomicSMax32:
181 case IR::Opcode::BoundImageAtomicUMax32:
182 case IR::Opcode::BoundImageAtomicInc32:
183 case IR::Opcode::BoundImageAtomicDec32:
184 case IR::Opcode::BoundImageAtomicAnd32:
185 case IR::Opcode::BoundImageAtomicOr32:
186 case IR::Opcode::BoundImageAtomicXor32:
187 case IR::Opcode::BoundImageAtomicExchange32:
188 case IR::Opcode::ImageAtomicIAdd32:
189 case IR::Opcode::ImageAtomicSMin32:
190 case IR::Opcode::ImageAtomicUMin32:
191 case IR::Opcode::ImageAtomicSMax32:
192 case IR::Opcode::ImageAtomicUMax32:
193 case IR::Opcode::ImageAtomicInc32:
194 case IR::Opcode::ImageAtomicDec32:
195 case IR::Opcode::ImageAtomicAnd32:
196 case IR::Opcode::ImageAtomicOr32:
197 case IR::Opcode::ImageAtomicXor32:
198 case IR::Opcode::ImageAtomicExchange32:
199 return true;
200 default:
201 return false;
202 }
203}
204
205bool Inst::IsPseudoInstruction() const noexcept {
206 switch (op) {
207 case Opcode::GetZeroFromOp:
208 case Opcode::GetSignFromOp:
209 case Opcode::GetCarryFromOp:
210 case Opcode::GetOverflowFromOp:
211 case Opcode::GetSparseFromOp:
212 case Opcode::GetInBoundsFromOp:
213 return true;
214 default:
215 return false;
216 }
217}
218
219bool Inst::AreAllArgsImmediates() const {
220 if (op == Opcode::Phi) {
221 throw LogicError("Testing for all arguments are immediates on phi instruction");
222 }
223 return std::all_of(args.begin(), args.begin() + NumArgs(),
224 [](const IR::Value& value) { return value.IsImmediate(); });
225}
226
227Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
228 if (!associated_insts) {
229 return nullptr;
230 }
231 switch (opcode) {
232 case Opcode::GetZeroFromOp:
233 CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp);
234 return associated_insts->zero_inst;
235 case Opcode::GetSignFromOp:
236 CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp);
237 return associated_insts->sign_inst;
238 case Opcode::GetCarryFromOp:
239 CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp);
240 return associated_insts->carry_inst;
241 case Opcode::GetOverflowFromOp:
242 CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp);
243 return associated_insts->overflow_inst;
244 case Opcode::GetSparseFromOp:
245 CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp);
246 return associated_insts->sparse_inst;
247 case Opcode::GetInBoundsFromOp:
248 CheckPseudoInstruction(associated_insts->in_bounds_inst, Opcode::GetInBoundsFromOp);
249 return associated_insts->in_bounds_inst;
250 default:
251 throw InvalidArgument("{} is not a pseudo-instruction", opcode);
252 }
253}
254
255IR::Type Inst::Type() const {
256 return TypeOf(op);
257}
258
259void Inst::SetArg(size_t index, Value value) {
260 if (index >= NumArgs()) {
261 throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op);
262 }
263 const IR::Value arg{Arg(index)};
264 if (!arg.IsImmediate()) {
265 UndoUse(arg);
266 }
267 if (!value.IsImmediate()) {
268 Use(value);
269 }
270 if (op == Opcode::Phi) {
271 phi_args[index].second = value;
272 } else {
273 args[index] = value;
274 }
275}
276
277Block* Inst::PhiBlock(size_t index) const {
278 if (op != Opcode::Phi) {
279 throw LogicError("{} is not a Phi instruction", op);
280 }
281 if (index >= phi_args.size()) {
282 throw InvalidArgument("Out of bounds argument index {} in phi instruction");
283 }
284 return phi_args[index].first;
285}
286
287void Inst::AddPhiOperand(Block* predecessor, const Value& value) {
288 if (!value.IsImmediate()) {
289 Use(value);
290 }
291 phi_args.emplace_back(predecessor, value);
292}
293
294void Inst::Invalidate() {
295 ClearArgs();
296 ReplaceOpcode(Opcode::Void);
297}
298
299void Inst::ClearArgs() {
300 if (op == Opcode::Phi) {
301 for (auto& pair : phi_args) {
302 IR::Value& value{pair.second};
303 if (!value.IsImmediate()) {
304 UndoUse(value);
305 }
306 }
307 phi_args.clear();
308 } else {
309 for (auto& value : args) {
310 if (!value.IsImmediate()) {
311 UndoUse(value);
312 }
313 }
314 // Reset arguments to null
315 // std::memset was measured to be faster on MSVC than std::ranges:fill
316 std::memset(reinterpret_cast<char*>(&args), 0, sizeof(args));
317 }
318}
319
320void Inst::ReplaceUsesWith(Value replacement) {
321 Invalidate();
322 ReplaceOpcode(Opcode::Identity);
323 if (!replacement.IsImmediate()) {
324 Use(replacement);
325 }
326 args[0] = replacement;
327}
328
329void Inst::ReplaceOpcode(IR::Opcode opcode) {
330 if (opcode == IR::Opcode::Phi) {
331 throw LogicError("Cannot transition into Phi");
332 }
333 if (op == Opcode::Phi) {
334 // Transition out of phi arguments into non-phi
335 std::destroy_at(&phi_args);
336 std::construct_at(&args);
337 }
338 op = opcode;
339}
340
341void Inst::Use(const Value& value) {
342 Inst* const inst{value.Inst()};
343 ++inst->use_count;
344
345 std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
346 switch (op) {
347 case Opcode::GetZeroFromOp:
348 AllocAssociatedInsts(assoc_inst);
349 SetPseudoInstruction(assoc_inst->zero_inst, this);
350 break;
351 case Opcode::GetSignFromOp:
352 AllocAssociatedInsts(assoc_inst);
353 SetPseudoInstruction(assoc_inst->sign_inst, this);
354 break;
355 case Opcode::GetCarryFromOp:
356 AllocAssociatedInsts(assoc_inst);
357 SetPseudoInstruction(assoc_inst->carry_inst, this);
358 break;
359 case Opcode::GetOverflowFromOp:
360 AllocAssociatedInsts(assoc_inst);
361 SetPseudoInstruction(assoc_inst->overflow_inst, this);
362 break;
363 case Opcode::GetSparseFromOp:
364 AllocAssociatedInsts(assoc_inst);
365 SetPseudoInstruction(assoc_inst->sparse_inst, this);
366 break;
367 case Opcode::GetInBoundsFromOp:
368 AllocAssociatedInsts(assoc_inst);
369 SetPseudoInstruction(assoc_inst->in_bounds_inst, this);
370 break;
371 default:
372 break;
373 }
374}
375
376void Inst::UndoUse(const Value& value) {
377 Inst* const inst{value.Inst()};
378 --inst->use_count;
379
380 std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
381 switch (op) {
382 case Opcode::GetZeroFromOp:
383 AllocAssociatedInsts(assoc_inst);
384 RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp);
385 break;
386 case Opcode::GetSignFromOp:
387 AllocAssociatedInsts(assoc_inst);
388 RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp);
389 break;
390 case Opcode::GetCarryFromOp:
391 AllocAssociatedInsts(assoc_inst);
392 RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp);
393 break;
394 case Opcode::GetOverflowFromOp:
395 AllocAssociatedInsts(assoc_inst);
396 RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp);
397 break;
398 case Opcode::GetSparseFromOp:
399 AllocAssociatedInsts(assoc_inst);
400 RemovePseudoInstruction(assoc_inst->sparse_inst, Opcode::GetSparseFromOp);
401 break;
402 case Opcode::GetInBoundsFromOp:
403 AllocAssociatedInsts(assoc_inst);
404 RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp);
405 break;
406 default:
407 break;
408 }
409}
410
411} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
new file mode 100644
index 000000000..77cda1f8a
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -0,0 +1,49 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/shader_info.h"
10
11namespace Shader::IR {
12
13enum class FmzMode : u8 {
14 DontCare, // Not specified for this instruction
15 FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK)
16 FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9)
17 None, // Denorms are not flushed, NAN is propagated (nouveau)
18};
19
20enum class FpRounding : u8 {
21 DontCare, // Not specified for this instruction
22 RN, // Round to nearest even,
23 RM, // Round towards negative infinity
24 RP, // Round towards positive infinity
25 RZ, // Round towards zero
26};
27
28struct FpControl {
29 bool no_contraction{false};
30 FpRounding rounding{FpRounding::DontCare};
31 FmzMode fmz_mode{FmzMode::DontCare};
32};
33static_assert(sizeof(FpControl) <= sizeof(u32));
34
35union TextureInstInfo {
36 u32 raw;
37 BitField<0, 16, u32> descriptor_index;
38 BitField<16, 3, TextureType> type;
39 BitField<19, 1, u32> is_depth;
40 BitField<20, 1, u32> has_bias;
41 BitField<21, 1, u32> has_lod_clamp;
42 BitField<22, 1, u32> relaxed_precision;
43 BitField<23, 2, u32> gather_component;
44 BitField<25, 2, u32> num_derivates;
45 BitField<27, 3, ImageFormat> image_format;
46};
47static_assert(sizeof(TextureInstInfo) <= sizeof(u32));
48
49} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp
new file mode 100644
index 000000000..24d024ad7
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/opcodes.cpp
@@ -0,0 +1,15 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/frontend/ir/opcodes.h"
8
9namespace Shader::IR {
10
11std::string_view NameOf(Opcode op) {
12 return Detail::META_TABLE[static_cast<size_t>(op)].name;
13}
14
15} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h
new file mode 100644
index 000000000..9ab108292
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/opcodes.h
@@ -0,0 +1,110 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <array>
9#include <string_view>
10
11#include <fmt/format.h>
12
13#include "shader_recompiler/frontend/ir/type.h"
14
15namespace Shader::IR {
16
17enum class Opcode {
18#define OPCODE(name, ...) name,
19#include "opcodes.inc"
20#undef OPCODE
21};
22
23namespace Detail {
24struct OpcodeMeta {
25 std::string_view name;
26 Type type;
27 std::array<Type, 5> arg_types;
28};
29
30// using enum Type;
31constexpr Type Void{Type::Void};
32constexpr Type Opaque{Type::Opaque};
33constexpr Type Reg{Type::Reg};
34constexpr Type Pred{Type::Pred};
35constexpr Type Attribute{Type::Attribute};
36constexpr Type Patch{Type::Patch};
37constexpr Type U1{Type::U1};
38constexpr Type U8{Type::U8};
39constexpr Type U16{Type::U16};
40constexpr Type U32{Type::U32};
41constexpr Type U64{Type::U64};
42constexpr Type F16{Type::F16};
43constexpr Type F32{Type::F32};
44constexpr Type F64{Type::F64};
45constexpr Type U32x2{Type::U32x2};
46constexpr Type U32x3{Type::U32x3};
47constexpr Type U32x4{Type::U32x4};
48constexpr Type F16x2{Type::F16x2};
49constexpr Type F16x3{Type::F16x3};
50constexpr Type F16x4{Type::F16x4};
51constexpr Type F32x2{Type::F32x2};
52constexpr Type F32x3{Type::F32x3};
53constexpr Type F32x4{Type::F32x4};
54constexpr Type F64x2{Type::F64x2};
55constexpr Type F64x3{Type::F64x3};
56constexpr Type F64x4{Type::F64x4};
57
58constexpr OpcodeMeta META_TABLE[]{
59#define OPCODE(name_token, type_token, ...) \
60 { \
61 .name{#name_token}, \
62 .type = type_token, \
63 .arg_types{__VA_ARGS__}, \
64 },
65#include "opcodes.inc"
66#undef OPCODE
67};
68constexpr size_t CalculateNumArgsOf(Opcode op) {
69 const auto& arg_types{META_TABLE[static_cast<size_t>(op)].arg_types};
70 return static_cast<size_t>(
71 std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void)));
72}
73
74constexpr u8 NUM_ARGS[]{
75#define OPCODE(name_token, type_token, ...) static_cast<u8>(CalculateNumArgsOf(Opcode::name_token)),
76#include "opcodes.inc"
77#undef OPCODE
78};
79} // namespace Detail
80
81/// Get return type of an opcode
82[[nodiscard]] inline Type TypeOf(Opcode op) noexcept {
83 return Detail::META_TABLE[static_cast<size_t>(op)].type;
84}
85
86/// Get the number of arguments an opcode accepts
87[[nodiscard]] inline size_t NumArgsOf(Opcode op) noexcept {
88 return static_cast<size_t>(Detail::NUM_ARGS[static_cast<size_t>(op)]);
89}
90
91/// Get the required type of an argument of an opcode
92[[nodiscard]] inline Type ArgTypeOf(Opcode op, size_t arg_index) noexcept {
93 return Detail::META_TABLE[static_cast<size_t>(op)].arg_types[arg_index];
94}
95
96/// Get the name of an opcode
97[[nodiscard]] std::string_view NameOf(Opcode op);
98
99} // namespace Shader::IR
100
101template <>
102struct fmt::formatter<Shader::IR::Opcode> {
103 constexpr auto parse(format_parse_context& ctx) {
104 return ctx.begin();
105 }
106 template <typename FormatContext>
107 auto format(const Shader::IR::Opcode& op, FormatContext& ctx) {
108 return format_to(ctx.out(), "{}", Shader::IR::NameOf(op));
109 }
110};
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
new file mode 100644
index 000000000..d91098c80
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -0,0 +1,550 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ...
6OPCODE(Phi, Opaque, )
7OPCODE(Identity, Opaque, Opaque, )
8OPCODE(Void, Void, )
9OPCODE(ConditionRef, U1, U1, )
10OPCODE(Reference, Void, Opaque, )
11OPCODE(PhiMove, Void, Opaque, Opaque, )
12
13// Special operations
14OPCODE(Prologue, Void, )
15OPCODE(Epilogue, Void, )
16OPCODE(Join, Void, )
17OPCODE(DemoteToHelperInvocation, Void, )
18OPCODE(EmitVertex, Void, U32, )
19OPCODE(EndPrimitive, Void, U32, )
20
21// Barriers
22OPCODE(Barrier, Void, )
23OPCODE(WorkgroupMemoryBarrier, Void, )
24OPCODE(DeviceMemoryBarrier, Void, )
25
26// Context getters/setters
27OPCODE(GetRegister, U32, Reg, )
28OPCODE(SetRegister, Void, Reg, U32, )
29OPCODE(GetPred, U1, Pred, )
30OPCODE(SetPred, Void, Pred, U1, )
31OPCODE(GetGotoVariable, U1, U32, )
32OPCODE(SetGotoVariable, Void, U32, U1, )
33OPCODE(GetIndirectBranchVariable, U32, )
34OPCODE(SetIndirectBranchVariable, Void, U32, )
35OPCODE(GetCbufU8, U32, U32, U32, )
36OPCODE(GetCbufS8, U32, U32, U32, )
37OPCODE(GetCbufU16, U32, U32, U32, )
38OPCODE(GetCbufS16, U32, U32, U32, )
39OPCODE(GetCbufU32, U32, U32, U32, )
40OPCODE(GetCbufF32, F32, U32, U32, )
41OPCODE(GetCbufU32x2, U32x2, U32, U32, )
42OPCODE(GetAttribute, F32, Attribute, U32, )
43OPCODE(SetAttribute, Void, Attribute, F32, U32, )
44OPCODE(GetAttributeIndexed, F32, U32, U32, )
45OPCODE(SetAttributeIndexed, Void, U32, F32, U32, )
46OPCODE(GetPatch, F32, Patch, )
47OPCODE(SetPatch, Void, Patch, F32, )
48OPCODE(SetFragColor, Void, U32, U32, F32, )
49OPCODE(SetSampleMask, Void, U32, )
50OPCODE(SetFragDepth, Void, F32, )
51OPCODE(GetZFlag, U1, Void, )
52OPCODE(GetSFlag, U1, Void, )
53OPCODE(GetCFlag, U1, Void, )
54OPCODE(GetOFlag, U1, Void, )
55OPCODE(SetZFlag, Void, U1, )
56OPCODE(SetSFlag, Void, U1, )
57OPCODE(SetCFlag, Void, U1, )
58OPCODE(SetOFlag, Void, U1, )
59OPCODE(WorkgroupId, U32x3, )
60OPCODE(LocalInvocationId, U32x3, )
61OPCODE(InvocationId, U32, )
62OPCODE(SampleId, U32, )
63OPCODE(IsHelperInvocation, U1, )
64OPCODE(YDirection, F32, )
65
66// Undefined
67OPCODE(UndefU1, U1, )
68OPCODE(UndefU8, U8, )
69OPCODE(UndefU16, U16, )
70OPCODE(UndefU32, U32, )
71OPCODE(UndefU64, U64, )
72
73// Memory operations
74OPCODE(LoadGlobalU8, U32, Opaque, )
75OPCODE(LoadGlobalS8, U32, Opaque, )
76OPCODE(LoadGlobalU16, U32, Opaque, )
77OPCODE(LoadGlobalS16, U32, Opaque, )
78OPCODE(LoadGlobal32, U32, Opaque, )
79OPCODE(LoadGlobal64, U32x2, Opaque, )
80OPCODE(LoadGlobal128, U32x4, Opaque, )
81OPCODE(WriteGlobalU8, Void, Opaque, U32, )
82OPCODE(WriteGlobalS8, Void, Opaque, U32, )
83OPCODE(WriteGlobalU16, Void, Opaque, U32, )
84OPCODE(WriteGlobalS16, Void, Opaque, U32, )
85OPCODE(WriteGlobal32, Void, Opaque, U32, )
86OPCODE(WriteGlobal64, Void, Opaque, U32x2, )
87OPCODE(WriteGlobal128, Void, Opaque, U32x4, )
88
89// Storage buffer operations
90OPCODE(LoadStorageU8, U32, U32, U32, )
91OPCODE(LoadStorageS8, U32, U32, U32, )
92OPCODE(LoadStorageU16, U32, U32, U32, )
93OPCODE(LoadStorageS16, U32, U32, U32, )
94OPCODE(LoadStorage32, U32, U32, U32, )
95OPCODE(LoadStorage64, U32x2, U32, U32, )
96OPCODE(LoadStorage128, U32x4, U32, U32, )
97OPCODE(WriteStorageU8, Void, U32, U32, U32, )
98OPCODE(WriteStorageS8, Void, U32, U32, U32, )
99OPCODE(WriteStorageU16, Void, U32, U32, U32, )
100OPCODE(WriteStorageS16, Void, U32, U32, U32, )
101OPCODE(WriteStorage32, Void, U32, U32, U32, )
102OPCODE(WriteStorage64, Void, U32, U32, U32x2, )
103OPCODE(WriteStorage128, Void, U32, U32, U32x4, )
104
105// Local memory operations
106OPCODE(LoadLocal, U32, U32, )
107OPCODE(WriteLocal, Void, U32, U32, )
108
109// Shared memory operations
110OPCODE(LoadSharedU8, U32, U32, )
111OPCODE(LoadSharedS8, U32, U32, )
112OPCODE(LoadSharedU16, U32, U32, )
113OPCODE(LoadSharedS16, U32, U32, )
114OPCODE(LoadSharedU32, U32, U32, )
115OPCODE(LoadSharedU64, U32x2, U32, )
116OPCODE(LoadSharedU128, U32x4, U32, )
117OPCODE(WriteSharedU8, Void, U32, U32, )
118OPCODE(WriteSharedU16, Void, U32, U32, )
119OPCODE(WriteSharedU32, Void, U32, U32, )
120OPCODE(WriteSharedU64, Void, U32, U32x2, )
121OPCODE(WriteSharedU128, Void, U32, U32x4, )
122
123// Vector utility
124OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
125OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )
126OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, )
127OPCODE(CompositeExtractU32x2, U32, U32x2, U32, )
128OPCODE(CompositeExtractU32x3, U32, U32x3, U32, )
129OPCODE(CompositeExtractU32x4, U32, U32x4, U32, )
130OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, )
131OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, )
132OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, )
133OPCODE(CompositeConstructF16x2, F16x2, F16, F16, )
134OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, )
135OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, )
136OPCODE(CompositeExtractF16x2, F16, F16x2, U32, )
137OPCODE(CompositeExtractF16x3, F16, F16x3, U32, )
138OPCODE(CompositeExtractF16x4, F16, F16x4, U32, )
139OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, )
140OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, )
141OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, )
142OPCODE(CompositeConstructF32x2, F32x2, F32, F32, )
143OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, )
144OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, )
145OPCODE(CompositeExtractF32x2, F32, F32x2, U32, )
146OPCODE(CompositeExtractF32x3, F32, F32x3, U32, )
147OPCODE(CompositeExtractF32x4, F32, F32x4, U32, )
148OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, )
149OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, )
150OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, )
151OPCODE(CompositeConstructF64x2, F64x2, F64, F64, )
152OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, )
153OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, )
154OPCODE(CompositeExtractF64x2, F64, F64x2, U32, )
155OPCODE(CompositeExtractF64x3, F64, F64x3, U32, )
156OPCODE(CompositeExtractF64x4, F64, F64x4, U32, )
157OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, )
158OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, )
159OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, )
160
161// Select operations
162OPCODE(SelectU1, U1, U1, U1, U1, )
163OPCODE(SelectU8, U8, U1, U8, U8, )
164OPCODE(SelectU16, U16, U1, U16, U16, )
165OPCODE(SelectU32, U32, U1, U32, U32, )
166OPCODE(SelectU64, U64, U1, U64, U64, )
167OPCODE(SelectF16, F16, U1, F16, F16, )
168OPCODE(SelectF32, F32, U1, F32, F32, )
169OPCODE(SelectF64, F64, U1, F64, F64, )
170
171// Bitwise conversions
172OPCODE(BitCastU16F16, U16, F16, )
173OPCODE(BitCastU32F32, U32, F32, )
174OPCODE(BitCastU64F64, U64, F64, )
175OPCODE(BitCastF16U16, F16, U16, )
176OPCODE(BitCastF32U32, F32, U32, )
177OPCODE(BitCastF64U64, F64, U64, )
178OPCODE(PackUint2x32, U64, U32x2, )
179OPCODE(UnpackUint2x32, U32x2, U64, )
180OPCODE(PackFloat2x16, U32, F16x2, )
181OPCODE(UnpackFloat2x16, F16x2, U32, )
182OPCODE(PackHalf2x16, U32, F32x2, )
183OPCODE(UnpackHalf2x16, F32x2, U32, )
184OPCODE(PackDouble2x32, F64, U32x2, )
185OPCODE(UnpackDouble2x32, U32x2, F64, )
186
187// Pseudo-operation, handled specially at final emit
188OPCODE(GetZeroFromOp, U1, Opaque, )
189OPCODE(GetSignFromOp, U1, Opaque, )
190OPCODE(GetCarryFromOp, U1, Opaque, )
191OPCODE(GetOverflowFromOp, U1, Opaque, )
192OPCODE(GetSparseFromOp, U1, Opaque, )
193OPCODE(GetInBoundsFromOp, U1, Opaque, )
194
195// Floating-point operations
196OPCODE(FPAbs16, F16, F16, )
197OPCODE(FPAbs32, F32, F32, )
198OPCODE(FPAbs64, F64, F64, )
199OPCODE(FPAdd16, F16, F16, F16, )
200OPCODE(FPAdd32, F32, F32, F32, )
201OPCODE(FPAdd64, F64, F64, F64, )
202OPCODE(FPFma16, F16, F16, F16, F16, )
203OPCODE(FPFma32, F32, F32, F32, F32, )
204OPCODE(FPFma64, F64, F64, F64, F64, )
205OPCODE(FPMax32, F32, F32, F32, )
206OPCODE(FPMax64, F64, F64, F64, )
207OPCODE(FPMin32, F32, F32, F32, )
208OPCODE(FPMin64, F64, F64, F64, )
209OPCODE(FPMul16, F16, F16, F16, )
210OPCODE(FPMul32, F32, F32, F32, )
211OPCODE(FPMul64, F64, F64, F64, )
212OPCODE(FPNeg16, F16, F16, )
213OPCODE(FPNeg32, F32, F32, )
214OPCODE(FPNeg64, F64, F64, )
215OPCODE(FPRecip32, F32, F32, )
216OPCODE(FPRecip64, F64, F64, )
217OPCODE(FPRecipSqrt32, F32, F32, )
218OPCODE(FPRecipSqrt64, F64, F64, )
219OPCODE(FPSqrt, F32, F32, )
220OPCODE(FPSin, F32, F32, )
221OPCODE(FPExp2, F32, F32, )
222OPCODE(FPCos, F32, F32, )
223OPCODE(FPLog2, F32, F32, )
224OPCODE(FPSaturate16, F16, F16, )
225OPCODE(FPSaturate32, F32, F32, )
226OPCODE(FPSaturate64, F64, F64, )
227OPCODE(FPClamp16, F16, F16, F16, F16, )
228OPCODE(FPClamp32, F32, F32, F32, F32, )
229OPCODE(FPClamp64, F64, F64, F64, F64, )
230OPCODE(FPRoundEven16, F16, F16, )
231OPCODE(FPRoundEven32, F32, F32, )
232OPCODE(FPRoundEven64, F64, F64, )
233OPCODE(FPFloor16, F16, F16, )
234OPCODE(FPFloor32, F32, F32, )
235OPCODE(FPFloor64, F64, F64, )
236OPCODE(FPCeil16, F16, F16, )
237OPCODE(FPCeil32, F32, F32, )
238OPCODE(FPCeil64, F64, F64, )
239OPCODE(FPTrunc16, F16, F16, )
240OPCODE(FPTrunc32, F32, F32, )
241OPCODE(FPTrunc64, F64, F64, )
242
243OPCODE(FPOrdEqual16, U1, F16, F16, )
244OPCODE(FPOrdEqual32, U1, F32, F32, )
245OPCODE(FPOrdEqual64, U1, F64, F64, )
246OPCODE(FPUnordEqual16, U1, F16, F16, )
247OPCODE(FPUnordEqual32, U1, F32, F32, )
248OPCODE(FPUnordEqual64, U1, F64, F64, )
249OPCODE(FPOrdNotEqual16, U1, F16, F16, )
250OPCODE(FPOrdNotEqual32, U1, F32, F32, )
251OPCODE(FPOrdNotEqual64, U1, F64, F64, )
252OPCODE(FPUnordNotEqual16, U1, F16, F16, )
253OPCODE(FPUnordNotEqual32, U1, F32, F32, )
254OPCODE(FPUnordNotEqual64, U1, F64, F64, )
255OPCODE(FPOrdLessThan16, U1, F16, F16, )
256OPCODE(FPOrdLessThan32, U1, F32, F32, )
257OPCODE(FPOrdLessThan64, U1, F64, F64, )
258OPCODE(FPUnordLessThan16, U1, F16, F16, )
259OPCODE(FPUnordLessThan32, U1, F32, F32, )
260OPCODE(FPUnordLessThan64, U1, F64, F64, )
261OPCODE(FPOrdGreaterThan16, U1, F16, F16, )
262OPCODE(FPOrdGreaterThan32, U1, F32, F32, )
263OPCODE(FPOrdGreaterThan64, U1, F64, F64, )
264OPCODE(FPUnordGreaterThan16, U1, F16, F16, )
265OPCODE(FPUnordGreaterThan32, U1, F32, F32, )
266OPCODE(FPUnordGreaterThan64, U1, F64, F64, )
267OPCODE(FPOrdLessThanEqual16, U1, F16, F16, )
268OPCODE(FPOrdLessThanEqual32, U1, F32, F32, )
269OPCODE(FPOrdLessThanEqual64, U1, F64, F64, )
270OPCODE(FPUnordLessThanEqual16, U1, F16, F16, )
271OPCODE(FPUnordLessThanEqual32, U1, F32, F32, )
272OPCODE(FPUnordLessThanEqual64, U1, F64, F64, )
273OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, )
274OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, )
275OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, )
276OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, )
277OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, )
278OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
279OPCODE(FPIsNan16, U1, F16, )
280OPCODE(FPIsNan32, U1, F32, )
281OPCODE(FPIsNan64, U1, F64, )
282
283// Integer operations
284OPCODE(IAdd32, U32, U32, U32, )
285OPCODE(IAdd64, U64, U64, U64, )
286OPCODE(ISub32, U32, U32, U32, )
287OPCODE(ISub64, U64, U64, U64, )
288OPCODE(IMul32, U32, U32, U32, )
289OPCODE(INeg32, U32, U32, )
290OPCODE(INeg64, U64, U64, )
291OPCODE(IAbs32, U32, U32, )
292OPCODE(ShiftLeftLogical32, U32, U32, U32, )
293OPCODE(ShiftLeftLogical64, U64, U64, U32, )
294OPCODE(ShiftRightLogical32, U32, U32, U32, )
295OPCODE(ShiftRightLogical64, U64, U64, U32, )
296OPCODE(ShiftRightArithmetic32, U32, U32, U32, )
297OPCODE(ShiftRightArithmetic64, U64, U64, U32, )
298OPCODE(BitwiseAnd32, U32, U32, U32, )
299OPCODE(BitwiseOr32, U32, U32, U32, )
300OPCODE(BitwiseXor32, U32, U32, U32, )
301OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, )
302OPCODE(BitFieldSExtract, U32, U32, U32, U32, )
303OPCODE(BitFieldUExtract, U32, U32, U32, U32, )
304OPCODE(BitReverse32, U32, U32, )
305OPCODE(BitCount32, U32, U32, )
306OPCODE(BitwiseNot32, U32, U32, )
307
308OPCODE(FindSMsb32, U32, U32, )
309OPCODE(FindUMsb32, U32, U32, )
310OPCODE(SMin32, U32, U32, U32, )
311OPCODE(UMin32, U32, U32, U32, )
312OPCODE(SMax32, U32, U32, U32, )
313OPCODE(UMax32, U32, U32, U32, )
314OPCODE(SClamp32, U32, U32, U32, U32, )
315OPCODE(UClamp32, U32, U32, U32, U32, )
316OPCODE(SLessThan, U1, U32, U32, )
317OPCODE(ULessThan, U1, U32, U32, )
318OPCODE(IEqual, U1, U32, U32, )
319OPCODE(SLessThanEqual, U1, U32, U32, )
320OPCODE(ULessThanEqual, U1, U32, U32, )
321OPCODE(SGreaterThan, U1, U32, U32, )
322OPCODE(UGreaterThan, U1, U32, U32, )
323OPCODE(INotEqual, U1, U32, U32, )
324OPCODE(SGreaterThanEqual, U1, U32, U32, )
325OPCODE(UGreaterThanEqual, U1, U32, U32, )
326
327// Atomic operations
328OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
329OPCODE(SharedAtomicSMin32, U32, U32, U32, )
330OPCODE(SharedAtomicUMin32, U32, U32, U32, )
331OPCODE(SharedAtomicSMax32, U32, U32, U32, )
332OPCODE(SharedAtomicUMax32, U32, U32, U32, )
333OPCODE(SharedAtomicInc32, U32, U32, U32, )
334OPCODE(SharedAtomicDec32, U32, U32, U32, )
335OPCODE(SharedAtomicAnd32, U32, U32, U32, )
336OPCODE(SharedAtomicOr32, U32, U32, U32, )
337OPCODE(SharedAtomicXor32, U32, U32, U32, )
338OPCODE(SharedAtomicExchange32, U32, U32, U32, )
339OPCODE(SharedAtomicExchange64, U64, U32, U64, )
340
341OPCODE(GlobalAtomicIAdd32, U32, U64, U32, )
342OPCODE(GlobalAtomicSMin32, U32, U64, U32, )
343OPCODE(GlobalAtomicUMin32, U32, U64, U32, )
344OPCODE(GlobalAtomicSMax32, U32, U64, U32, )
345OPCODE(GlobalAtomicUMax32, U32, U64, U32, )
346OPCODE(GlobalAtomicInc32, U32, U64, U32, )
347OPCODE(GlobalAtomicDec32, U32, U64, U32, )
348OPCODE(GlobalAtomicAnd32, U32, U64, U32, )
349OPCODE(GlobalAtomicOr32, U32, U64, U32, )
350OPCODE(GlobalAtomicXor32, U32, U64, U32, )
351OPCODE(GlobalAtomicExchange32, U32, U64, U32, )
352OPCODE(GlobalAtomicIAdd64, U64, U64, U64, )
353OPCODE(GlobalAtomicSMin64, U64, U64, U64, )
354OPCODE(GlobalAtomicUMin64, U64, U64, U64, )
355OPCODE(GlobalAtomicSMax64, U64, U64, U64, )
356OPCODE(GlobalAtomicUMax64, U64, U64, U64, )
357OPCODE(GlobalAtomicAnd64, U64, U64, U64, )
358OPCODE(GlobalAtomicOr64, U64, U64, U64, )
359OPCODE(GlobalAtomicXor64, U64, U64, U64, )
360OPCODE(GlobalAtomicExchange64, U64, U64, U64, )
361OPCODE(GlobalAtomicAddF32, F32, U64, F32, )
362OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, )
363OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, )
364OPCODE(GlobalAtomicMinF16x2, U32, U64, F16x2, )
365OPCODE(GlobalAtomicMinF32x2, U32, U64, F32x2, )
366OPCODE(GlobalAtomicMaxF16x2, U32, U64, F16x2, )
367OPCODE(GlobalAtomicMaxF32x2, U32, U64, F32x2, )
368
369OPCODE(StorageAtomicIAdd32, U32, U32, U32, U32, )
370OPCODE(StorageAtomicSMin32, U32, U32, U32, U32, )
371OPCODE(StorageAtomicUMin32, U32, U32, U32, U32, )
372OPCODE(StorageAtomicSMax32, U32, U32, U32, U32, )
373OPCODE(StorageAtomicUMax32, U32, U32, U32, U32, )
374OPCODE(StorageAtomicInc32, U32, U32, U32, U32, )
375OPCODE(StorageAtomicDec32, U32, U32, U32, U32, )
376OPCODE(StorageAtomicAnd32, U32, U32, U32, U32, )
377OPCODE(StorageAtomicOr32, U32, U32, U32, U32, )
378OPCODE(StorageAtomicXor32, U32, U32, U32, U32, )
379OPCODE(StorageAtomicExchange32, U32, U32, U32, U32, )
380OPCODE(StorageAtomicIAdd64, U64, U32, U32, U64, )
381OPCODE(StorageAtomicSMin64, U64, U32, U32, U64, )
382OPCODE(StorageAtomicUMin64, U64, U32, U32, U64, )
383OPCODE(StorageAtomicSMax64, U64, U32, U32, U64, )
384OPCODE(StorageAtomicUMax64, U64, U32, U32, U64, )
385OPCODE(StorageAtomicAnd64, U64, U32, U32, U64, )
386OPCODE(StorageAtomicOr64, U64, U32, U32, U64, )
387OPCODE(StorageAtomicXor64, U64, U32, U32, U64, )
388OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, )
389OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, )
390OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, )
391OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, )
392OPCODE(StorageAtomicMinF16x2, U32, U32, U32, F16x2, )
393OPCODE(StorageAtomicMinF32x2, U32, U32, U32, F32x2, )
394OPCODE(StorageAtomicMaxF16x2, U32, U32, U32, F16x2, )
395OPCODE(StorageAtomicMaxF32x2, U32, U32, U32, F32x2, )
396
397// Logical operations
398OPCODE(LogicalOr, U1, U1, U1, )
399OPCODE(LogicalAnd, U1, U1, U1, )
400OPCODE(LogicalXor, U1, U1, U1, )
401OPCODE(LogicalNot, U1, U1, )
402
403// Conversion operations
404OPCODE(ConvertS16F16, U32, F16, )
405OPCODE(ConvertS16F32, U32, F32, )
406OPCODE(ConvertS16F64, U32, F64, )
407OPCODE(ConvertS32F16, U32, F16, )
408OPCODE(ConvertS32F32, U32, F32, )
409OPCODE(ConvertS32F64, U32, F64, )
410OPCODE(ConvertS64F16, U64, F16, )
411OPCODE(ConvertS64F32, U64, F32, )
412OPCODE(ConvertS64F64, U64, F64, )
413OPCODE(ConvertU16F16, U32, F16, )
414OPCODE(ConvertU16F32, U32, F32, )
415OPCODE(ConvertU16F64, U32, F64, )
416OPCODE(ConvertU32F16, U32, F16, )
417OPCODE(ConvertU32F32, U32, F32, )
418OPCODE(ConvertU32F64, U32, F64, )
419OPCODE(ConvertU64F16, U64, F16, )
420OPCODE(ConvertU64F32, U64, F32, )
421OPCODE(ConvertU64F64, U64, F64, )
422OPCODE(ConvertU64U32, U64, U32, )
423OPCODE(ConvertU32U64, U32, U64, )
424OPCODE(ConvertF16F32, F16, F32, )
425OPCODE(ConvertF32F16, F32, F16, )
426OPCODE(ConvertF32F64, F32, F64, )
427OPCODE(ConvertF64F32, F64, F32, )
428OPCODE(ConvertF16S8, F16, U32, )
429OPCODE(ConvertF16S16, F16, U32, )
430OPCODE(ConvertF16S32, F16, U32, )
431OPCODE(ConvertF16S64, F16, U64, )
432OPCODE(ConvertF16U8, F16, U32, )
433OPCODE(ConvertF16U16, F16, U32, )
434OPCODE(ConvertF16U32, F16, U32, )
435OPCODE(ConvertF16U64, F16, U64, )
436OPCODE(ConvertF32S8, F32, U32, )
437OPCODE(ConvertF32S16, F32, U32, )
438OPCODE(ConvertF32S32, F32, U32, )
439OPCODE(ConvertF32S64, F32, U64, )
440OPCODE(ConvertF32U8, F32, U32, )
441OPCODE(ConvertF32U16, F32, U32, )
442OPCODE(ConvertF32U32, F32, U32, )
443OPCODE(ConvertF32U64, F32, U64, )
444OPCODE(ConvertF64S8, F64, U32, )
445OPCODE(ConvertF64S16, F64, U32, )
446OPCODE(ConvertF64S32, F64, U32, )
447OPCODE(ConvertF64S64, F64, U64, )
448OPCODE(ConvertF64U8, F64, U32, )
449OPCODE(ConvertF64U16, F64, U32, )
450OPCODE(ConvertF64U32, F64, U32, )
451OPCODE(ConvertF64U64, F64, U64, )
452
453// Image operations
454OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
455OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
456OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
457OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
458OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, )
459OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, )
460OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, )
461OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, )
462OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, )
463OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, )
464OPCODE(BindlessImageRead, U32x4, U32, Opaque, )
465OPCODE(BindlessImageWrite, Void, U32, Opaque, U32x4, )
466
467OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
468OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
469OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
470OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
471OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, )
472OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, )
473OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, )
474OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, )
475OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, )
476OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, )
477OPCODE(BoundImageRead, U32x4, U32, Opaque, )
478OPCODE(BoundImageWrite, Void, U32, Opaque, U32x4, )
479
480OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
481OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
482OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, )
483OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, )
484OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, )
485OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, )
486OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, )
487OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, )
488OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
489OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, )
490OPCODE(ImageRead, U32x4, Opaque, Opaque, )
491OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
492
493// Atomic Image operations
494
495OPCODE(BindlessImageAtomicIAdd32, U32, U32, Opaque, U32, )
496OPCODE(BindlessImageAtomicSMin32, U32, U32, Opaque, U32, )
497OPCODE(BindlessImageAtomicUMin32, U32, U32, Opaque, U32, )
498OPCODE(BindlessImageAtomicSMax32, U32, U32, Opaque, U32, )
499OPCODE(BindlessImageAtomicUMax32, U32, U32, Opaque, U32, )
500OPCODE(BindlessImageAtomicInc32, U32, U32, Opaque, U32, )
501OPCODE(BindlessImageAtomicDec32, U32, U32, Opaque, U32, )
502OPCODE(BindlessImageAtomicAnd32, U32, U32, Opaque, U32, )
503OPCODE(BindlessImageAtomicOr32, U32, U32, Opaque, U32, )
504OPCODE(BindlessImageAtomicXor32, U32, U32, Opaque, U32, )
505OPCODE(BindlessImageAtomicExchange32, U32, U32, Opaque, U32, )
506
507OPCODE(BoundImageAtomicIAdd32, U32, U32, Opaque, U32, )
508OPCODE(BoundImageAtomicSMin32, U32, U32, Opaque, U32, )
509OPCODE(BoundImageAtomicUMin32, U32, U32, Opaque, U32, )
510OPCODE(BoundImageAtomicSMax32, U32, U32, Opaque, U32, )
511OPCODE(BoundImageAtomicUMax32, U32, U32, Opaque, U32, )
512OPCODE(BoundImageAtomicInc32, U32, U32, Opaque, U32, )
513OPCODE(BoundImageAtomicDec32, U32, U32, Opaque, U32, )
514OPCODE(BoundImageAtomicAnd32, U32, U32, Opaque, U32, )
515OPCODE(BoundImageAtomicOr32, U32, U32, Opaque, U32, )
516OPCODE(BoundImageAtomicXor32, U32, U32, Opaque, U32, )
517OPCODE(BoundImageAtomicExchange32, U32, U32, Opaque, U32, )
518
519OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
520OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, )
521OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
522OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
523OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
524OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
525OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
526OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
527OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, )
528OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
529OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
530
531// Warp operations
532OPCODE(LaneId, U32, )
533OPCODE(VoteAll, U1, U1, )
534OPCODE(VoteAny, U1, U1, )
535OPCODE(VoteEqual, U1, U1, )
536OPCODE(SubgroupBallot, U32, U1, )
537OPCODE(SubgroupEqMask, U32, )
538OPCODE(SubgroupLtMask, U32, )
539OPCODE(SubgroupLeMask, U32, )
540OPCODE(SubgroupGtMask, U32, )
541OPCODE(SubgroupGeMask, U32, )
542OPCODE(ShuffleIndex, U32, U32, U32, U32, U32, )
543OPCODE(ShuffleUp, U32, U32, U32, U32, U32, )
544OPCODE(ShuffleDown, U32, U32, U32, U32, U32, )
545OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, )
546OPCODE(FSwizzleAdd, F32, F32, F32, U32, )
547OPCODE(DPdxFine, F32, F32, )
548OPCODE(DPdyFine, F32, F32, )
549OPCODE(DPdxCoarse, F32, F32, )
550OPCODE(DPdyCoarse, F32, F32, )
diff --git a/src/shader_recompiler/frontend/ir/patch.cpp b/src/shader_recompiler/frontend/ir/patch.cpp
new file mode 100644
index 000000000..4c956a970
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/patch.cpp
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/exception.h"
6#include "shader_recompiler/frontend/ir/patch.h"
7
8namespace Shader::IR {
9
10bool IsGeneric(Patch patch) noexcept {
11 return patch >= Patch::Component0 && patch <= Patch::Component119;
12}
13
14u32 GenericPatchIndex(Patch patch) {
15 if (!IsGeneric(patch)) {
16 throw InvalidArgument("Patch {} is not generic", patch);
17 }
18 return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4;
19}
20
21u32 GenericPatchElement(Patch patch) {
22 if (!IsGeneric(patch)) {
23 throw InvalidArgument("Patch {} is not generic", patch);
24 }
25 return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4;
26}
27
28} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/patch.h b/src/shader_recompiler/frontend/ir/patch.h
new file mode 100644
index 000000000..6d66ff0d6
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/patch.h
@@ -0,0 +1,149 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Shader::IR {
10
11enum class Patch : u64 {
12 TessellationLodLeft,
13 TessellationLodTop,
14 TessellationLodRight,
15 TessellationLodBottom,
16 TessellationLodInteriorU,
17 TessellationLodInteriorV,
18 ComponentPadding0,
19 ComponentPadding1,
20 Component0,
21 Component1,
22 Component2,
23 Component3,
24 Component4,
25 Component5,
26 Component6,
27 Component7,
28 Component8,
29 Component9,
30 Component10,
31 Component11,
32 Component12,
33 Component13,
34 Component14,
35 Component15,
36 Component16,
37 Component17,
38 Component18,
39 Component19,
40 Component20,
41 Component21,
42 Component22,
43 Component23,
44 Component24,
45 Component25,
46 Component26,
47 Component27,
48 Component28,
49 Component29,
50 Component30,
51 Component31,
52 Component32,
53 Component33,
54 Component34,
55 Component35,
56 Component36,
57 Component37,
58 Component38,
59 Component39,
60 Component40,
61 Component41,
62 Component42,
63 Component43,
64 Component44,
65 Component45,
66 Component46,
67 Component47,
68 Component48,
69 Component49,
70 Component50,
71 Component51,
72 Component52,
73 Component53,
74 Component54,
75 Component55,
76 Component56,
77 Component57,
78 Component58,
79 Component59,
80 Component60,
81 Component61,
82 Component62,
83 Component63,
84 Component64,
85 Component65,
86 Component66,
87 Component67,
88 Component68,
89 Component69,
90 Component70,
91 Component71,
92 Component72,
93 Component73,
94 Component74,
95 Component75,
96 Component76,
97 Component77,
98 Component78,
99 Component79,
100 Component80,
101 Component81,
102 Component82,
103 Component83,
104 Component84,
105 Component85,
106 Component86,
107 Component87,
108 Component88,
109 Component89,
110 Component90,
111 Component91,
112 Component92,
113 Component93,
114 Component94,
115 Component95,
116 Component96,
117 Component97,
118 Component98,
119 Component99,
120 Component100,
121 Component101,
122 Component102,
123 Component103,
124 Component104,
125 Component105,
126 Component106,
127 Component107,
128 Component108,
129 Component109,
130 Component110,
131 Component111,
132 Component112,
133 Component113,
134 Component114,
135 Component115,
136 Component116,
137 Component117,
138 Component118,
139 Component119,
140};
141static_assert(static_cast<u64>(Patch::Component119) == 127);
142
143[[nodiscard]] bool IsGeneric(Patch patch) noexcept;
144
145[[nodiscard]] u32 GenericPatchIndex(Patch patch);
146
147[[nodiscard]] u32 GenericPatchElement(Patch patch);
148
149} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp
new file mode 100644
index 000000000..16bc44101
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/post_order.cpp
@@ -0,0 +1,46 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include <boost/container/flat_set.hpp>
8#include <boost/container/small_vector.hpp>
9
10#include "shader_recompiler/frontend/ir/basic_block.h"
11#include "shader_recompiler/frontend/ir/post_order.h"
12
13namespace Shader::IR {
14
15BlockList PostOrder(const AbstractSyntaxNode& root) {
16 boost::container::small_vector<Block*, 16> block_stack;
17 boost::container::flat_set<Block*> visited;
18 BlockList post_order_blocks;
19
20 if (root.type != AbstractSyntaxNode::Type::Block) {
21 throw LogicError("First node in abstract syntax list root is not a block");
22 }
23 Block* const first_block{root.data.block};
24 visited.insert(first_block);
25 block_stack.push_back(first_block);
26
27 while (!block_stack.empty()) {
28 Block* const block{block_stack.back()};
29 const auto visit{[&](Block* branch) {
30 if (!visited.insert(branch).second) {
31 return false;
32 }
33 // Calling push_back twice is faster than insert on MSVC
34 block_stack.push_back(block);
35 block_stack.push_back(branch);
36 return true;
37 }};
38 block_stack.pop_back();
39 if (std::ranges::none_of(block->ImmSuccessors(), visit)) {
40 post_order_blocks.push_back(block);
41 }
42 }
43 return post_order_blocks;
44}
45
46} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/post_order.h b/src/shader_recompiler/frontend/ir/post_order.h
new file mode 100644
index 000000000..07bfbadc3
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/post_order.h
@@ -0,0 +1,14 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9
10namespace Shader::IR {
11
12BlockList PostOrder(const AbstractSyntaxNode& root);
13
14} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h
new file mode 100644
index 000000000..4e7f32423
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/pred.h
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <fmt/format.h>
8
9namespace Shader::IR {
10
11enum class Pred : u64 {
12 P0,
13 P1,
14 P2,
15 P3,
16 P4,
17 P5,
18 P6,
19 PT,
20};
21
22constexpr size_t NUM_USER_PREDS = 7;
23constexpr size_t NUM_PREDS = 8;
24
25[[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept {
26 return static_cast<size_t>(pred);
27}
28
29} // namespace Shader::IR
30
31template <>
32struct fmt::formatter<Shader::IR::Pred> {
33 constexpr auto parse(format_parse_context& ctx) {
34 return ctx.begin();
35 }
36 template <typename FormatContext>
37 auto format(const Shader::IR::Pred& pred, FormatContext& ctx) {
38 if (pred == Shader::IR::Pred::PT) {
39 return fmt::format_to(ctx.out(), "PT");
40 } else {
41 return fmt::format_to(ctx.out(), "P{}", static_cast<int>(pred));
42 }
43 }
44};
diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp
new file mode 100644
index 000000000..3fc06f855
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/program.cpp
@@ -0,0 +1,32 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <map>
6#include <string>
7
8#include <fmt/format.h>
9
10#include "shader_recompiler/frontend/ir/basic_block.h"
11#include "shader_recompiler/frontend/ir/program.h"
12#include "shader_recompiler/frontend/ir/value.h"
13
14namespace Shader::IR {
15
16std::string DumpProgram(const Program& program) {
17 size_t index{0};
18 std::map<const IR::Inst*, size_t> inst_to_index;
19 std::map<const IR::Block*, size_t> block_to_index;
20
21 for (const IR::Block* const block : program.blocks) {
22 block_to_index.emplace(block, index);
23 ++index;
24 }
25 std::string ret;
26 for (const auto& block : program.blocks) {
27 ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n';
28 }
29 return ret;
30}
31
32} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h
new file mode 100644
index 000000000..ebcaa8bc2
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/program.h
@@ -0,0 +1,35 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <string>
9
10#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
11#include "shader_recompiler/frontend/ir/basic_block.h"
12#include "shader_recompiler/program_header.h"
13#include "shader_recompiler/shader_info.h"
14#include "shader_recompiler/stage.h"
15
16namespace Shader::IR {
17
18struct Program {
19 AbstractSyntaxList syntax_list;
20 BlockList blocks;
21 BlockList post_order_blocks;
22 Info info;
23 Stage stage{};
24 std::array<u32, 3> workgroup_size{};
25 OutputTopology output_topology{};
26 u32 output_vertices{};
27 u32 invocations{};
28 u32 local_memory_size{};
29 u32 shared_memory_size{};
30 bool is_geometry_passthrough{};
31};
32
33[[nodiscard]] std::string DumpProgram(const Program& program);
34
35} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h
new file mode 100644
index 000000000..a4b635792
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/reg.h
@@ -0,0 +1,332 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <fmt/format.h>
8
9#include "common/common_types.h"
10#include "shader_recompiler/exception.h"
11
12namespace Shader::IR {
13
14enum class Reg : u64 {
15 R0,
16 R1,
17 R2,
18 R3,
19 R4,
20 R5,
21 R6,
22 R7,
23 R8,
24 R9,
25 R10,
26 R11,
27 R12,
28 R13,
29 R14,
30 R15,
31 R16,
32 R17,
33 R18,
34 R19,
35 R20,
36 R21,
37 R22,
38 R23,
39 R24,
40 R25,
41 R26,
42 R27,
43 R28,
44 R29,
45 R30,
46 R31,
47 R32,
48 R33,
49 R34,
50 R35,
51 R36,
52 R37,
53 R38,
54 R39,
55 R40,
56 R41,
57 R42,
58 R43,
59 R44,
60 R45,
61 R46,
62 R47,
63 R48,
64 R49,
65 R50,
66 R51,
67 R52,
68 R53,
69 R54,
70 R55,
71 R56,
72 R57,
73 R58,
74 R59,
75 R60,
76 R61,
77 R62,
78 R63,
79 R64,
80 R65,
81 R66,
82 R67,
83 R68,
84 R69,
85 R70,
86 R71,
87 R72,
88 R73,
89 R74,
90 R75,
91 R76,
92 R77,
93 R78,
94 R79,
95 R80,
96 R81,
97 R82,
98 R83,
99 R84,
100 R85,
101 R86,
102 R87,
103 R88,
104 R89,
105 R90,
106 R91,
107 R92,
108 R93,
109 R94,
110 R95,
111 R96,
112 R97,
113 R98,
114 R99,
115 R100,
116 R101,
117 R102,
118 R103,
119 R104,
120 R105,
121 R106,
122 R107,
123 R108,
124 R109,
125 R110,
126 R111,
127 R112,
128 R113,
129 R114,
130 R115,
131 R116,
132 R117,
133 R118,
134 R119,
135 R120,
136 R121,
137 R122,
138 R123,
139 R124,
140 R125,
141 R126,
142 R127,
143 R128,
144 R129,
145 R130,
146 R131,
147 R132,
148 R133,
149 R134,
150 R135,
151 R136,
152 R137,
153 R138,
154 R139,
155 R140,
156 R141,
157 R142,
158 R143,
159 R144,
160 R145,
161 R146,
162 R147,
163 R148,
164 R149,
165 R150,
166 R151,
167 R152,
168 R153,
169 R154,
170 R155,
171 R156,
172 R157,
173 R158,
174 R159,
175 R160,
176 R161,
177 R162,
178 R163,
179 R164,
180 R165,
181 R166,
182 R167,
183 R168,
184 R169,
185 R170,
186 R171,
187 R172,
188 R173,
189 R174,
190 R175,
191 R176,
192 R177,
193 R178,
194 R179,
195 R180,
196 R181,
197 R182,
198 R183,
199 R184,
200 R185,
201 R186,
202 R187,
203 R188,
204 R189,
205 R190,
206 R191,
207 R192,
208 R193,
209 R194,
210 R195,
211 R196,
212 R197,
213 R198,
214 R199,
215 R200,
216 R201,
217 R202,
218 R203,
219 R204,
220 R205,
221 R206,
222 R207,
223 R208,
224 R209,
225 R210,
226 R211,
227 R212,
228 R213,
229 R214,
230 R215,
231 R216,
232 R217,
233 R218,
234 R219,
235 R220,
236 R221,
237 R222,
238 R223,
239 R224,
240 R225,
241 R226,
242 R227,
243 R228,
244 R229,
245 R230,
246 R231,
247 R232,
248 R233,
249 R234,
250 R235,
251 R236,
252 R237,
253 R238,
254 R239,
255 R240,
256 R241,
257 R242,
258 R243,
259 R244,
260 R245,
261 R246,
262 R247,
263 R248,
264 R249,
265 R250,
266 R251,
267 R252,
268 R253,
269 R254,
270 RZ,
271};
272static_assert(static_cast<int>(Reg::RZ) == 255);
273
274constexpr size_t NUM_USER_REGS = 255;
275constexpr size_t NUM_REGS = 256;
276
277[[nodiscard]] constexpr Reg operator+(Reg reg, int num) {
278 if (reg == Reg::RZ) {
279 // Adding or subtracting registers from RZ yields RZ
280 return Reg::RZ;
281 }
282 const int result{static_cast<int>(reg) + num};
283 if (result >= static_cast<int>(Reg::RZ)) {
284 throw LogicError("Overflow on register arithmetic");
285 }
286 if (result < 0) {
287 throw LogicError("Underflow on register arithmetic");
288 }
289 return static_cast<Reg>(result);
290}
291
292[[nodiscard]] constexpr Reg operator-(Reg reg, int num) {
293 return reg + (-num);
294}
295
296constexpr Reg operator++(Reg& reg) {
297 reg = reg + 1;
298 return reg;
299}
300
301constexpr Reg operator++(Reg& reg, int) {
302 const Reg copy{reg};
303 reg = reg + 1;
304 return copy;
305}
306
307[[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept {
308 return static_cast<size_t>(reg);
309}
310
311[[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) {
312 return RegIndex(reg) % align == 0 || reg == Reg::RZ;
313}
314
315} // namespace Shader::IR
316
317template <>
318struct fmt::formatter<Shader::IR::Reg> {
319 constexpr auto parse(format_parse_context& ctx) {
320 return ctx.begin();
321 }
322 template <typename FormatContext>
323 auto format(const Shader::IR::Reg& reg, FormatContext& ctx) {
324 if (reg == Shader::IR::Reg::RZ) {
325 return fmt::format_to(ctx.out(), "RZ");
326 } else if (static_cast<int>(reg) >= 0 && static_cast<int>(reg) < 255) {
327 return fmt::format_to(ctx.out(), "R{}", static_cast<int>(reg));
328 } else {
329 throw Shader::LogicError("Invalid register with raw value {}", static_cast<int>(reg));
330 }
331 }
332};
diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp
new file mode 100644
index 000000000..f28341bfe
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/type.cpp
@@ -0,0 +1,38 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <string>
7
8#include "shader_recompiler/frontend/ir/type.h"
9
10namespace Shader::IR {
11
12std::string NameOf(Type type) {
13 static constexpr std::array names{
14 "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32",
15 "U64", "F16", "F32", "F64", "U32x2", "U32x3", "U32x4", "F16x2", "F16x3",
16 "F16x4", "F32x2", "F32x3", "F32x4", "F64x2", "F64x3", "F64x4",
17 };
18 const size_t bits{static_cast<size_t>(type)};
19 if (bits == 0) {
20 return "Void";
21 }
22 std::string result;
23 for (size_t i = 0; i < names.size(); i++) {
24 if ((bits & (size_t{1} << i)) != 0) {
25 if (!result.empty()) {
26 result += '|';
27 }
28 result += names[i];
29 }
30 }
31 return result;
32}
33
34bool AreTypesCompatible(Type lhs, Type rhs) noexcept {
35 return lhs == rhs || lhs == Type::Opaque || rhs == Type::Opaque;
36}
37
38} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h
new file mode 100644
index 000000000..294b230c4
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/type.h
@@ -0,0 +1,61 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8
9#include <fmt/format.h>
10
11#include "common/common_funcs.h"
12#include "shader_recompiler/exception.h"
13
14namespace Shader::IR {
15
16enum class Type {
17 Void = 0,
18 Opaque = 1 << 0,
19 Reg = 1 << 1,
20 Pred = 1 << 2,
21 Attribute = 1 << 3,
22 Patch = 1 << 4,
23 U1 = 1 << 5,
24 U8 = 1 << 6,
25 U16 = 1 << 7,
26 U32 = 1 << 8,
27 U64 = 1 << 9,
28 F16 = 1 << 10,
29 F32 = 1 << 11,
30 F64 = 1 << 12,
31 U32x2 = 1 << 13,
32 U32x3 = 1 << 14,
33 U32x4 = 1 << 15,
34 F16x2 = 1 << 16,
35 F16x3 = 1 << 17,
36 F16x4 = 1 << 18,
37 F32x2 = 1 << 19,
38 F32x3 = 1 << 20,
39 F32x4 = 1 << 21,
40 F64x2 = 1 << 22,
41 F64x3 = 1 << 23,
42 F64x4 = 1 << 24,
43};
44DECLARE_ENUM_FLAG_OPERATORS(Type)
45
46[[nodiscard]] std::string NameOf(Type type);
47
48[[nodiscard]] bool AreTypesCompatible(Type lhs, Type rhs) noexcept;
49
50} // namespace Shader::IR
51
52template <>
53struct fmt::formatter<Shader::IR::Type> {
54 constexpr auto parse(format_parse_context& ctx) {
55 return ctx.begin();
56 }
57 template <typename FormatContext>
58 auto format(const Shader::IR::Type& type, FormatContext& ctx) {
59 return fmt::format_to(ctx.out(), "{}", NameOf(type));
60 }
61};
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
new file mode 100644
index 000000000..d365ea1bc
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -0,0 +1,99 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/ir/opcodes.h"
6#include "shader_recompiler/frontend/ir/value.h"
7
8namespace Shader::IR {
9
10Value::Value(IR::Inst* value) noexcept : type{Type::Opaque}, inst{value} {}
11
12Value::Value(IR::Reg value) noexcept : type{Type::Reg}, reg{value} {}
13
14Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {}
15
16Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {}
17
18Value::Value(IR::Patch value) noexcept : type{Type::Patch}, patch{value} {}
19
20Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {}
21
22Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {}
23
24Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {}
25
26Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {}
27
28Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {}
29
30Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {}
31
32Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {}
33
34IR::Type Value::Type() const noexcept {
35 if (IsPhi()) {
36 // The type of a phi node is stored in its flags
37 return inst->Flags<IR::Type>();
38 }
39 if (IsIdentity()) {
40 return inst->Arg(0).Type();
41 }
42 if (type == Type::Opaque) {
43 return inst->Type();
44 }
45 return type;
46}
47
48bool Value::operator==(const Value& other) const {
49 if (type != other.type) {
50 return false;
51 }
52 switch (type) {
53 case Type::Void:
54 return true;
55 case Type::Opaque:
56 return inst == other.inst;
57 case Type::Reg:
58 return reg == other.reg;
59 case Type::Pred:
60 return pred == other.pred;
61 case Type::Attribute:
62 return attribute == other.attribute;
63 case Type::Patch:
64 return patch == other.patch;
65 case Type::U1:
66 return imm_u1 == other.imm_u1;
67 case Type::U8:
68 return imm_u8 == other.imm_u8;
69 case Type::U16:
70 case Type::F16:
71 return imm_u16 == other.imm_u16;
72 case Type::U32:
73 case Type::F32:
74 return imm_u32 == other.imm_u32;
75 case Type::U64:
76 case Type::F64:
77 return imm_u64 == other.imm_u64;
78 case Type::U32x2:
79 case Type::U32x3:
80 case Type::U32x4:
81 case Type::F16x2:
82 case Type::F16x3:
83 case Type::F16x4:
84 case Type::F32x2:
85 case Type::F32x3:
86 case Type::F32x4:
87 case Type::F64x2:
88 case Type::F64x3:
89 case Type::F64x4:
90 break;
91 }
92 throw LogicError("Invalid type {}", type);
93}
94
95bool Value::operator!=(const Value& other) const {
96 return !operator==(other);
97}
98
99} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
new file mode 100644
index 000000000..0c6bf684d
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -0,0 +1,398 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstring>
9#include <memory>
10#include <type_traits>
11#include <utility>
12#include <vector>
13
14#include <boost/container/small_vector.hpp>
15#include <boost/intrusive/list.hpp>
16
17#include "common/assert.h"
18#include "common/bit_cast.h"
19#include "common/common_types.h"
20#include "shader_recompiler/exception.h"
21#include "shader_recompiler/frontend/ir/attribute.h"
22#include "shader_recompiler/frontend/ir/opcodes.h"
23#include "shader_recompiler/frontend/ir/patch.h"
24#include "shader_recompiler/frontend/ir/pred.h"
25#include "shader_recompiler/frontend/ir/reg.h"
26#include "shader_recompiler/frontend/ir/type.h"
27#include "shader_recompiler/frontend/ir/value.h"
28
29namespace Shader::IR {
30
31class Block;
32class Inst;
33
34struct AssociatedInsts;
35
36class Value {
37public:
38 Value() noexcept = default;
39 explicit Value(IR::Inst* value) noexcept;
40 explicit Value(IR::Reg value) noexcept;
41 explicit Value(IR::Pred value) noexcept;
42 explicit Value(IR::Attribute value) noexcept;
43 explicit Value(IR::Patch value) noexcept;
44 explicit Value(bool value) noexcept;
45 explicit Value(u8 value) noexcept;
46 explicit Value(u16 value) noexcept;
47 explicit Value(u32 value) noexcept;
48 explicit Value(f32 value) noexcept;
49 explicit Value(u64 value) noexcept;
50 explicit Value(f64 value) noexcept;
51
52 [[nodiscard]] bool IsIdentity() const noexcept;
53 [[nodiscard]] bool IsPhi() const noexcept;
54 [[nodiscard]] bool IsEmpty() const noexcept;
55 [[nodiscard]] bool IsImmediate() const noexcept;
56 [[nodiscard]] IR::Type Type() const noexcept;
57
58 [[nodiscard]] IR::Inst* Inst() const;
59 [[nodiscard]] IR::Inst* InstRecursive() const;
60 [[nodiscard]] IR::Value Resolve() const;
61 [[nodiscard]] IR::Reg Reg() const;
62 [[nodiscard]] IR::Pred Pred() const;
63 [[nodiscard]] IR::Attribute Attribute() const;
64 [[nodiscard]] IR::Patch Patch() const;
65 [[nodiscard]] bool U1() const;
66 [[nodiscard]] u8 U8() const;
67 [[nodiscard]] u16 U16() const;
68 [[nodiscard]] u32 U32() const;
69 [[nodiscard]] f32 F32() const;
70 [[nodiscard]] u64 U64() const;
71 [[nodiscard]] f64 F64() const;
72
73 [[nodiscard]] bool operator==(const Value& other) const;
74 [[nodiscard]] bool operator!=(const Value& other) const;
75
76private:
77 IR::Type type{};
78 union {
79 IR::Inst* inst{};
80 IR::Reg reg;
81 IR::Pred pred;
82 IR::Attribute attribute;
83 IR::Patch patch;
84 bool imm_u1;
85 u8 imm_u8;
86 u16 imm_u16;
87 u32 imm_u32;
88 f32 imm_f32;
89 u64 imm_u64;
90 f64 imm_f64;
91 };
92};
93static_assert(static_cast<u32>(IR::Type::Void) == 0, "memset relies on IR::Type being zero");
94static_assert(std::is_trivially_copyable_v<Value>);
95
96template <IR::Type type_>
97class TypedValue : public Value {
98public:
99 TypedValue() = default;
100
101 template <IR::Type other_type>
102 requires((other_type & type_) != IR::Type::Void) explicit(false)
103 TypedValue(const TypedValue<other_type>& value)
104 : Value(value) {}
105
106 explicit TypedValue(const Value& value) : Value(value) {
107 if ((value.Type() & type_) == IR::Type::Void) {
108 throw InvalidArgument("Incompatible types {} and {}", type_, value.Type());
109 }
110 }
111
112 explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {}
113};
114
115class Inst : public boost::intrusive::list_base_hook<> {
116public:
117 explicit Inst(IR::Opcode op_, u32 flags_) noexcept;
118 ~Inst();
119
120 Inst& operator=(const Inst&) = delete;
121 Inst(const Inst&) = delete;
122
123 Inst& operator=(Inst&&) = delete;
124 Inst(Inst&&) = delete;
125
126 /// Get the number of uses this instruction has.
127 [[nodiscard]] int UseCount() const noexcept {
128 return use_count;
129 }
130
131 /// Determines whether this instruction has uses or not.
132 [[nodiscard]] bool HasUses() const noexcept {
133 return use_count > 0;
134 }
135
136 /// Get the opcode this microinstruction represents.
137 [[nodiscard]] IR::Opcode GetOpcode() const noexcept {
138 return op;
139 }
140
141 /// Determines if there is a pseudo-operation associated with this instruction.
142 [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept {
143 return associated_insts != nullptr;
144 }
145
146 /// Determines whether or not this instruction may have side effects.
147 [[nodiscard]] bool MayHaveSideEffects() const noexcept;
148
149 /// Determines whether or not this instruction is a pseudo-instruction.
150 /// Pseudo-instructions depend on their parent instructions for their semantics.
151 [[nodiscard]] bool IsPseudoInstruction() const noexcept;
152
153 /// Determines if all arguments of this instruction are immediates.
154 [[nodiscard]] bool AreAllArgsImmediates() const;
155
156 /// Gets a pseudo-operation associated with this instruction
157 [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode);
158
159 /// Get the type this instruction returns.
160 [[nodiscard]] IR::Type Type() const;
161
162 /// Get the number of arguments this instruction has.
163 [[nodiscard]] size_t NumArgs() const {
164 return op == IR::Opcode::Phi ? phi_args.size() : NumArgsOf(op);
165 }
166
167 /// Get the value of a given argument index.
168 [[nodiscard]] Value Arg(size_t index) const noexcept {
169 if (op == IR::Opcode::Phi) {
170 return phi_args[index].second;
171 } else {
172 return args[index];
173 }
174 }
175
176 /// Set the value of a given argument index.
177 void SetArg(size_t index, Value value);
178
179 /// Get a pointer to the block of a phi argument.
180 [[nodiscard]] Block* PhiBlock(size_t index) const;
181 /// Add phi operand to a phi instruction.
182 void AddPhiOperand(Block* predecessor, const Value& value);
183
184 void Invalidate();
185 void ClearArgs();
186
187 void ReplaceUsesWith(Value replacement);
188
189 void ReplaceOpcode(IR::Opcode opcode);
190
191 template <typename FlagsType>
192 requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
193 [[nodiscard]] FlagsType Flags() const noexcept {
194 FlagsType ret;
195 std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret));
196 return ret;
197 }
198
199 template <typename FlagsType>
200 requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
201 [[nodiscard]] void SetFlags(FlagsType value) noexcept {
202 std::memcpy(&flags, &value, sizeof(value));
203 }
204
205 /// Intrusively store the host definition of this instruction.
206 template <typename DefinitionType>
207 void SetDefinition(DefinitionType def) {
208 definition = Common::BitCast<u32>(def);
209 }
210
211 /// Return the intrusively stored host definition of this instruction.
212 template <typename DefinitionType>
213 [[nodiscard]] DefinitionType Definition() const noexcept {
214 return Common::BitCast<DefinitionType>(definition);
215 }
216
217 /// Destructively remove one reference count from the instruction
218 /// Useful for register allocation
219 void DestructiveRemoveUsage() {
220 --use_count;
221 }
222
223 /// Destructively add usages to the instruction
224 /// Useful for register allocation
225 void DestructiveAddUsage(int count) {
226 use_count += count;
227 }
228
229private:
230 struct NonTriviallyDummy {
231 NonTriviallyDummy() noexcept {}
232 };
233
234 void Use(const Value& value);
235 void UndoUse(const Value& value);
236
237 IR::Opcode op{};
238 int use_count{};
239 u32 flags{};
240 u32 definition{};
241 union {
242 NonTriviallyDummy dummy{};
243 boost::container::small_vector<std::pair<Block*, Value>, 2> phi_args;
244 std::array<Value, 5> args;
245 };
246 std::unique_ptr<AssociatedInsts> associated_insts;
247};
248static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");
249
250struct AssociatedInsts {
251 union {
252 Inst* in_bounds_inst;
253 Inst* sparse_inst;
254 Inst* zero_inst{};
255 };
256 Inst* sign_inst{};
257 Inst* carry_inst{};
258 Inst* overflow_inst{};
259};
260
261using U1 = TypedValue<Type::U1>;
262using U8 = TypedValue<Type::U8>;
263using U16 = TypedValue<Type::U16>;
264using U32 = TypedValue<Type::U32>;
265using U64 = TypedValue<Type::U64>;
266using F16 = TypedValue<Type::F16>;
267using F32 = TypedValue<Type::F32>;
268using F64 = TypedValue<Type::F64>;
269using U32U64 = TypedValue<Type::U32 | Type::U64>;
270using F32F64 = TypedValue<Type::F32 | Type::F64>;
271using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>;
272using F16F32F64 = TypedValue<Type::F16 | Type::F32 | Type::F64>;
273using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
274
275inline bool Value::IsIdentity() const noexcept {
276 return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity;
277}
278
279inline bool Value::IsPhi() const noexcept {
280 return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi;
281}
282
283inline bool Value::IsEmpty() const noexcept {
284 return type == Type::Void;
285}
286
287inline bool Value::IsImmediate() const noexcept {
288 IR::Type current_type{type};
289 const IR::Inst* current_inst{inst};
290 while (current_type == Type::Opaque && current_inst->GetOpcode() == Opcode::Identity) {
291 const Value& arg{current_inst->Arg(0)};
292 current_type = arg.type;
293 current_inst = arg.inst;
294 }
295 return current_type != Type::Opaque;
296}
297
298inline IR::Inst* Value::Inst() const {
299 DEBUG_ASSERT(type == Type::Opaque);
300 return inst;
301}
302
303inline IR::Inst* Value::InstRecursive() const {
304 DEBUG_ASSERT(type == Type::Opaque);
305 if (IsIdentity()) {
306 return inst->Arg(0).InstRecursive();
307 }
308 return inst;
309}
310
311inline IR::Value Value::Resolve() const {
312 if (IsIdentity()) {
313 return inst->Arg(0).Resolve();
314 }
315 return *this;
316}
317
318inline IR::Reg Value::Reg() const {
319 DEBUG_ASSERT(type == Type::Reg);
320 return reg;
321}
322
323inline IR::Pred Value::Pred() const {
324 DEBUG_ASSERT(type == Type::Pred);
325 return pred;
326}
327
328inline IR::Attribute Value::Attribute() const {
329 DEBUG_ASSERT(type == Type::Attribute);
330 return attribute;
331}
332
333inline IR::Patch Value::Patch() const {
334 DEBUG_ASSERT(type == Type::Patch);
335 return patch;
336}
337
338inline bool Value::U1() const {
339 if (IsIdentity()) {
340 return inst->Arg(0).U1();
341 }
342 DEBUG_ASSERT(type == Type::U1);
343 return imm_u1;
344}
345
346inline u8 Value::U8() const {
347 if (IsIdentity()) {
348 return inst->Arg(0).U8();
349 }
350 DEBUG_ASSERT(type == Type::U8);
351 return imm_u8;
352}
353
354inline u16 Value::U16() const {
355 if (IsIdentity()) {
356 return inst->Arg(0).U16();
357 }
358 DEBUG_ASSERT(type == Type::U16);
359 return imm_u16;
360}
361
362inline u32 Value::U32() const {
363 if (IsIdentity()) {
364 return inst->Arg(0).U32();
365 }
366 DEBUG_ASSERT(type == Type::U32);
367 return imm_u32;
368}
369
370inline f32 Value::F32() const {
371 if (IsIdentity()) {
372 return inst->Arg(0).F32();
373 }
374 DEBUG_ASSERT(type == Type::F32);
375 return imm_f32;
376}
377
378inline u64 Value::U64() const {
379 if (IsIdentity()) {
380 return inst->Arg(0).U64();
381 }
382 DEBUG_ASSERT(type == Type::U64);
383 return imm_u64;
384}
385
386inline f64 Value::F64() const {
387 if (IsIdentity()) {
388 return inst->Arg(0).F64();
389 }
390 DEBUG_ASSERT(type == Type::F64);
391 return imm_f64;
392}
393
394[[nodiscard]] inline bool IsPhi(const Inst& inst) {
395 return inst.GetOpcode() == Opcode::Phi;
396}
397
398} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
new file mode 100644
index 000000000..1a954a509
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
@@ -0,0 +1,642 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <optional>
8#include <string>
9#include <utility>
10
11#include <fmt/format.h>
12
13#include "shader_recompiler/exception.h"
14#include "shader_recompiler/frontend/maxwell/control_flow.h"
15#include "shader_recompiler/frontend/maxwell/decode.h"
16#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
17#include "shader_recompiler/frontend/maxwell/location.h"
18
19namespace Shader::Maxwell::Flow {
20namespace {
21struct Compare {
22 bool operator()(const Block& lhs, Location rhs) const noexcept {
23 return lhs.begin < rhs;
24 }
25
26 bool operator()(Location lhs, const Block& rhs) const noexcept {
27 return lhs < rhs.begin;
28 }
29
30 bool operator()(const Block& lhs, const Block& rhs) const noexcept {
31 return lhs.begin < rhs.begin;
32 }
33};
34
35u32 BranchOffset(Location pc, Instruction inst) {
36 return pc.Offset() + static_cast<u32>(inst.branch.Offset()) + 8u;
37}
38
39void Split(Block* old_block, Block* new_block, Location pc) {
40 if (pc <= old_block->begin || pc >= old_block->end) {
41 throw InvalidArgument("Invalid address to split={}", pc);
42 }
43 *new_block = Block{};
44 new_block->begin = pc;
45 new_block->end = old_block->end;
46 new_block->end_class = old_block->end_class;
47 new_block->cond = old_block->cond;
48 new_block->stack = old_block->stack;
49 new_block->branch_true = old_block->branch_true;
50 new_block->branch_false = old_block->branch_false;
51 new_block->function_call = old_block->function_call;
52 new_block->return_block = old_block->return_block;
53 new_block->branch_reg = old_block->branch_reg;
54 new_block->branch_offset = old_block->branch_offset;
55 new_block->indirect_branches = std::move(old_block->indirect_branches);
56
57 const Location old_begin{old_block->begin};
58 Stack old_stack{std::move(old_block->stack)};
59 *old_block = Block{};
60 old_block->begin = old_begin;
61 old_block->end = pc;
62 old_block->end_class = EndClass::Branch;
63 old_block->cond = IR::Condition(true);
64 old_block->stack = old_stack;
65 old_block->branch_true = new_block;
66 old_block->branch_false = nullptr;
67}
68
69Token OpcodeToken(Opcode opcode) {
70 switch (opcode) {
71 case Opcode::PBK:
72 case Opcode::BRK:
73 return Token::PBK;
74 case Opcode::PCNT:
75 case Opcode::CONT:
76 return Token::PBK;
77 case Opcode::PEXIT:
78 case Opcode::EXIT:
79 return Token::PEXIT;
80 case Opcode::PLONGJMP:
81 case Opcode::LONGJMP:
82 return Token::PLONGJMP;
83 case Opcode::PRET:
84 case Opcode::RET:
85 case Opcode::CAL:
86 return Token::PRET;
87 case Opcode::SSY:
88 case Opcode::SYNC:
89 return Token::SSY;
90 default:
91 throw InvalidArgument("{}", opcode);
92 }
93}
94
95bool IsAbsoluteJump(Opcode opcode) {
96 switch (opcode) {
97 case Opcode::JCAL:
98 case Opcode::JMP:
99 case Opcode::JMX:
100 return true;
101 default:
102 return false;
103 }
104}
105
106bool HasFlowTest(Opcode opcode) {
107 switch (opcode) {
108 case Opcode::BRA:
109 case Opcode::BRX:
110 case Opcode::EXIT:
111 case Opcode::JMP:
112 case Opcode::JMX:
113 case Opcode::KIL:
114 case Opcode::BRK:
115 case Opcode::CONT:
116 case Opcode::LONGJMP:
117 case Opcode::RET:
118 case Opcode::SYNC:
119 return true;
120 case Opcode::CAL:
121 case Opcode::JCAL:
122 return false;
123 default:
124 throw InvalidArgument("Invalid branch {}", opcode);
125 }
126}
127
128std::string NameOf(const Block& block) {
129 if (block.begin.IsVirtual()) {
130 return fmt::format("\"Virtual {}\"", block.begin);
131 } else {
132 return fmt::format("\"{}\"", block.begin);
133 }
134}
135} // Anonymous namespace
136
137void Stack::Push(Token token, Location target) {
138 entries.push_back({
139 .token = token,
140 .target{target},
141 });
142}
143
144std::pair<Location, Stack> Stack::Pop(Token token) const {
145 const std::optional<Location> pc{Peek(token)};
146 if (!pc) {
147 throw LogicError("Token could not be found");
148 }
149 return {*pc, Remove(token)};
150}
151
152std::optional<Location> Stack::Peek(Token token) const {
153 const auto it{std::find_if(entries.rbegin(), entries.rend(),
154 [token](const auto& entry) { return entry.token == token; })};
155 if (it == entries.rend()) {
156 return std::nullopt;
157 }
158 return it->target;
159}
160
161Stack Stack::Remove(Token token) const {
162 const auto it{std::find_if(entries.rbegin(), entries.rend(),
163 [token](const auto& entry) { return entry.token == token; })};
164 const auto pos{std::distance(entries.rbegin(), it)};
165 Stack result;
166 result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1);
167 return result;
168}
169
170bool Block::Contains(Location pc) const noexcept {
171 return pc >= begin && pc < end;
172}
173
174Function::Function(ObjectPool<Block>& block_pool, Location start_address)
175 : entrypoint{start_address} {
176 Label& label{labels.emplace_back()};
177 label.address = start_address;
178 label.block = block_pool.Create(Block{});
179 label.block->begin = start_address;
180 label.block->end = start_address;
181 label.block->end_class = EndClass::Branch;
182 label.block->cond = IR::Condition(true);
183 label.block->branch_true = nullptr;
184 label.block->branch_false = nullptr;
185}
186
187CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address,
188 bool exits_to_dispatcher_)
189 : env{env_}, block_pool{block_pool_}, program_start{start_address}, exits_to_dispatcher{
190 exits_to_dispatcher_} {
191 if (exits_to_dispatcher) {
192 dispatch_block = block_pool.Create(Block{});
193 dispatch_block->begin = {};
194 dispatch_block->end = {};
195 dispatch_block->end_class = EndClass::Exit;
196 dispatch_block->cond = IR::Condition(true);
197 dispatch_block->stack = {};
198 dispatch_block->branch_true = nullptr;
199 dispatch_block->branch_false = nullptr;
200 }
201 functions.emplace_back(block_pool, start_address);
202 for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) {
203 while (!functions[function_id].labels.empty()) {
204 Function& function{functions[function_id]};
205 Label label{function.labels.back()};
206 function.labels.pop_back();
207 AnalyzeLabel(function_id, label);
208 }
209 }
210 if (exits_to_dispatcher) {
211 const auto last_block{functions[0].blocks.rbegin()};
212 dispatch_block->begin = last_block->end + 1;
213 dispatch_block->end = last_block->end + 1;
214 functions[0].blocks.insert(*dispatch_block);
215 }
216}
217
218void CFG::AnalyzeLabel(FunctionId function_id, Label& label) {
219 if (InspectVisitedBlocks(function_id, label)) {
220 // Label address has been visited
221 return;
222 }
223 // Try to find the next block
224 Function* const function{&functions[function_id]};
225 Location pc{label.address};
226 const auto next_it{function->blocks.upper_bound(pc, Compare{})};
227 const bool is_last{next_it == function->blocks.end()};
228 Block* const next{is_last ? nullptr : &*next_it};
229 // Insert before the next block
230 Block* const block{label.block};
231 // Analyze instructions until it reaches an already visited block or there's a branch
232 bool is_branch{false};
233 while (!next || pc < next->begin) {
234 is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch;
235 if (is_branch) {
236 break;
237 }
238 ++pc;
239 }
240 if (!is_branch) {
241 // If the block finished without a branch,
242 // it means that the next instruction is already visited, jump to it
243 block->end = pc;
244 block->cond = IR::Condition{true};
245 block->branch_true = next;
246 block->branch_false = nullptr;
247 }
248 // Function's pointer might be invalid, resolve it again
249 // Insert the new block
250 functions[function_id].blocks.insert(*block);
251}
252
253bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) {
254 const Location pc{label.address};
255 Function& function{functions[function_id]};
256 const auto it{
257 std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })};
258 if (it == function.blocks.end()) {
259 // Address has not been visited
260 return false;
261 }
262 Block* const visited_block{&*it};
263 if (visited_block->begin == pc) {
264 throw LogicError("Dangling block");
265 }
266 Block* const new_block{label.block};
267 Split(visited_block, new_block, pc);
268 function.blocks.insert(it, *new_block);
269 return true;
270}
271
272CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) {
273 const Instruction inst{env.ReadInstruction(pc.Offset())};
274 const Opcode opcode{Decode(inst.raw)};
275 switch (opcode) {
276 case Opcode::BRA:
277 case Opcode::JMP:
278 case Opcode::RET:
279 if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
280 return AnalysisState::Continue;
281 }
282 switch (opcode) {
283 case Opcode::BRA:
284 case Opcode::JMP:
285 AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode));
286 break;
287 case Opcode::RET:
288 block->end_class = EndClass::Return;
289 break;
290 default:
291 break;
292 }
293 block->end = pc;
294 return AnalysisState::Branch;
295 case Opcode::BRK:
296 case Opcode::CONT:
297 case Opcode::LONGJMP:
298 case Opcode::SYNC: {
299 if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
300 return AnalysisState::Continue;
301 }
302 const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))};
303 block->branch_true = AddLabel(block, new_stack, stack_pc, function_id);
304 block->end = pc;
305 return AnalysisState::Branch;
306 }
307 case Opcode::KIL: {
308 const Predicate pred{inst.Pred()};
309 const auto ir_pred{static_cast<IR::Pred>(pred.index)};
310 const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated};
311 AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond);
312 return AnalysisState::Branch;
313 }
314 case Opcode::PBK:
315 case Opcode::PCNT:
316 case Opcode::PEXIT:
317 case Opcode::PLONGJMP:
318 case Opcode::SSY:
319 block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst));
320 return AnalysisState::Continue;
321 case Opcode::BRX:
322 case Opcode::JMX:
323 return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id);
324 case Opcode::EXIT:
325 return AnalyzeEXIT(block, function_id, pc, inst);
326 case Opcode::PRET:
327 throw NotImplementedException("PRET flow analysis");
328 case Opcode::CAL:
329 case Opcode::JCAL: {
330 const bool is_absolute{IsAbsoluteJump(opcode)};
331 const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
332 // Technically CAL pushes into PRET, but that's implicit in the function call for us
333 // Insert the function into the list if it doesn't exist
334 const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)};
335 const bool exists{it != functions.end()};
336 const FunctionId call_id{exists ? static_cast<size_t>(std::distance(functions.begin(), it))
337 : functions.size()};
338 if (!exists) {
339 functions.emplace_back(block_pool, cal_pc);
340 }
341 block->end_class = EndClass::Call;
342 block->function_call = call_id;
343 block->return_block = AddLabel(block, block->stack, pc + 1, function_id);
344 block->end = pc;
345 return AnalysisState::Branch;
346 }
347 default:
348 break;
349 }
350 const Predicate pred{inst.Pred()};
351 if (pred == Predicate{true} || pred == Predicate{false}) {
352 return AnalysisState::Continue;
353 }
354 const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated};
355 AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond);
356 return AnalysisState::Branch;
357}
358
359void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
360 EndClass insn_end_class, IR::Condition cond) {
361 if (block->begin != pc) {
362 // If the block doesn't start in the conditional instruction
363 // mark it as a label to visit it later
364 block->end = pc;
365 block->cond = IR::Condition{true};
366 block->branch_true = AddLabel(block, block->stack, pc, function_id);
367 block->branch_false = nullptr;
368 return;
369 }
370 // Create a virtual block and a conditional block
371 Block* const conditional_block{block_pool.Create()};
372 Block virtual_block{};
373 virtual_block.begin = block->begin.Virtual();
374 virtual_block.end = block->begin.Virtual();
375 virtual_block.end_class = EndClass::Branch;
376 virtual_block.stack = block->stack;
377 virtual_block.cond = cond;
378 virtual_block.branch_true = conditional_block;
379 virtual_block.branch_false = nullptr;
380 // Save the contents of the visited block in the conditional block
381 *conditional_block = std::move(*block);
382 // Impersonate the visited block with a virtual block
383 *block = std::move(virtual_block);
384 // Set the end properties of the conditional instruction
385 conditional_block->end = pc + 1;
386 conditional_block->end_class = insn_end_class;
387 // Add a label to the instruction after the conditional instruction
388 Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)};
389 // Branch to the next instruction from the virtual block
390 block->branch_false = endif_block;
391 // And branch to it from the conditional instruction if it is a branch or a kill instruction
392 // Kill instructions are considered a branch because they demote to a helper invocation and
393 // execution may continue.
394 if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) {
395 conditional_block->cond = IR::Condition{true};
396 conditional_block->branch_true = endif_block;
397 conditional_block->branch_false = nullptr;
398 }
399 // Finally insert the condition block into the list of blocks
400 functions[function_id].blocks.insert(*conditional_block);
401}
402
403bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
404 Opcode opcode) {
405 if (inst.branch.is_cbuf) {
406 throw NotImplementedException("Branch with constant buffer offset");
407 }
408 const Predicate pred{inst.Pred()};
409 if (pred == Predicate{false}) {
410 return false;
411 }
412 const bool has_flow_test{HasFlowTest(opcode)};
413 const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T};
414 if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
415 block->cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated);
416 block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
417 } else {
418 block->cond = IR::Condition{true};
419 }
420 return true;
421}
422
423void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
424 bool is_absolute) {
425 const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
426 block->branch_true = AddLabel(block, block->stack, bra_pc, function_id);
427}
428
429CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
430 FunctionId function_id) {
431 const std::optional brx_table{TrackIndirectBranchTable(env, pc, program_start)};
432 if (!brx_table) {
433 TrackIndirectBranchTable(env, pc, program_start);
434 throw NotImplementedException("Failed to track indirect branch");
435 }
436 const IR::FlowTest flow_test{inst.branch.flow_test};
437 const Predicate pred{inst.Pred()};
438 if (flow_test != IR::FlowTest::T || pred != Predicate{true}) {
439 throw NotImplementedException("Conditional indirect branch");
440 }
441 std::vector<u32> targets;
442 targets.reserve(brx_table->num_entries);
443 for (u32 i = 0; i < brx_table->num_entries; ++i) {
444 u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)};
445 if (!is_absolute) {
446 target += pc.Offset();
447 }
448 target += static_cast<u32>(brx_table->branch_offset);
449 target += 8;
450 targets.push_back(target);
451 }
452 std::ranges::sort(targets);
453 targets.erase(std::unique(targets.begin(), targets.end()), targets.end());
454
455 block->indirect_branches.reserve(targets.size());
456 for (const u32 target : targets) {
457 Block* const branch{AddLabel(block, block->stack, target, function_id)};
458 block->indirect_branches.push_back({
459 .block = branch,
460 .address = target,
461 });
462 }
463 block->cond = IR::Condition{true};
464 block->end = pc + 1;
465 block->end_class = EndClass::IndirectBranch;
466 block->branch_reg = brx_table->branch_reg;
467 block->branch_offset = brx_table->branch_offset + 8;
468 if (!is_absolute) {
469 block->branch_offset += pc.Offset();
470 }
471 return AnalysisState::Branch;
472}
473
474CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc,
475 Instruction inst) {
476 const IR::FlowTest flow_test{inst.branch.flow_test};
477 const Predicate pred{inst.Pred()};
478 if (pred == Predicate{false} || flow_test == IR::FlowTest::F) {
479 // EXIT will never be taken
480 return AnalysisState::Continue;
481 }
482 if (exits_to_dispatcher && function_id != 0) {
483 throw NotImplementedException("Dispatch EXIT on external function");
484 }
485 if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
486 if (block->stack.Peek(Token::PEXIT).has_value()) {
487 throw NotImplementedException("Conditional EXIT with PEXIT token");
488 }
489 const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated};
490 if (exits_to_dispatcher) {
491 block->end = pc;
492 block->end_class = EndClass::Branch;
493 block->cond = cond;
494 block->branch_true = dispatch_block;
495 block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
496 return AnalysisState::Branch;
497 }
498 AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond);
499 return AnalysisState::Branch;
500 }
501 if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) {
502 const Stack popped_stack{block->stack.Remove(Token::PEXIT)};
503 block->cond = IR::Condition{true};
504 block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id);
505 block->branch_false = nullptr;
506 return AnalysisState::Branch;
507 }
508 if (exits_to_dispatcher) {
509 block->cond = IR::Condition{true};
510 block->end = pc;
511 block->end_class = EndClass::Branch;
512 block->branch_true = dispatch_block;
513 block->branch_false = nullptr;
514 return AnalysisState::Branch;
515 }
516 block->end = pc + 1;
517 block->end_class = EndClass::Exit;
518 return AnalysisState::Branch;
519}
520
521Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) {
522 Function& function{functions[function_id]};
523 if (block->begin == pc) {
524 // Jumps to itself
525 return block;
526 }
527 if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) {
528 // Block already exists and it has been visited
529 if (function.blocks.begin() != it) {
530 // Check if the previous node is the virtual variant of the label
531 // This won't exist if a virtual node is not needed or it hasn't been visited
532 // If it hasn't been visited and a virtual node is needed, this will still behave as
533 // expected because the node impersonated with its virtual node.
534 const auto prev{std::prev(it)};
535 if (it->begin.Virtual() == prev->begin) {
536 return &*prev;
537 }
538 }
539 return &*it;
540 }
541 // Make sure we don't insert the same layer twice
542 const auto label_it{std::ranges::find(function.labels, pc, &Label::address)};
543 if (label_it != function.labels.end()) {
544 return label_it->block;
545 }
546 Block* const new_block{block_pool.Create()};
547 new_block->begin = pc;
548 new_block->end = pc;
549 new_block->end_class = EndClass::Branch;
550 new_block->cond = IR::Condition(true);
551 new_block->stack = stack;
552 new_block->branch_true = nullptr;
553 new_block->branch_false = nullptr;
554 function.labels.push_back(Label{
555 .address{pc},
556 .block = new_block,
557 .stack{std::move(stack)},
558 });
559 return new_block;
560}
561
562std::string CFG::Dot() const {
563 int node_uid{0};
564
565 std::string dot{"digraph shader {\n"};
566 for (const Function& function : functions) {
567 dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint);
568 dot += fmt::format("\t\tnode [style=filled];\n");
569 for (const Block& block : function.blocks) {
570 const std::string name{NameOf(block)};
571 const auto add_branch = [&](Block* branch, bool add_label) {
572 dot += fmt::format("\t\t{}->{}", name, NameOf(*branch));
573 if (add_label && block.cond != IR::Condition{true} &&
574 block.cond != IR::Condition{false}) {
575 dot += fmt::format(" [label=\"{}\"]", block.cond);
576 }
577 dot += '\n';
578 };
579 dot += fmt::format("\t\t{};\n", name);
580 switch (block.end_class) {
581 case EndClass::Branch:
582 if (block.cond != IR::Condition{false}) {
583 add_branch(block.branch_true, true);
584 }
585 if (block.cond != IR::Condition{true}) {
586 add_branch(block.branch_false, false);
587 }
588 break;
589 case EndClass::IndirectBranch:
590 for (const IndirectBranch& branch : block.indirect_branches) {
591 add_branch(branch.block, false);
592 }
593 break;
594 case EndClass::Call:
595 dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
596 dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block));
597 dot += fmt::format("\t\tN{} [label=\"Call {}\"][shape=square][style=stripped];\n",
598 node_uid, block.function_call);
599 dot += '\n';
600 ++node_uid;
601 break;
602 case EndClass::Exit:
603 dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
604 dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n",
605 node_uid);
606 ++node_uid;
607 break;
608 case EndClass::Return:
609 dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
610 dot += fmt::format("\t\tN{} [label=\"Return\"][shape=square][style=stripped];\n",
611 node_uid);
612 ++node_uid;
613 break;
614 case EndClass::Kill:
615 dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
616 dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n",
617 node_uid);
618 ++node_uid;
619 break;
620 }
621 }
622 if (function.entrypoint == 8) {
623 dot += fmt::format("\t\tlabel = \"main\";\n");
624 } else {
625 dot += fmt::format("\t\tlabel = \"Function {}\";\n", function.entrypoint);
626 }
627 dot += "\t}\n";
628 }
629 if (!functions.empty()) {
630 auto& function{functions.front()};
631 if (function.blocks.empty()) {
632 dot += "Start;\n";
633 } else {
634 dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin()));
635 }
636 dot += fmt::format("\tStart [shape=diamond];\n");
637 }
638 dot += "}\n";
639 return dot;
640}
641
642} // namespace Shader::Maxwell::Flow
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h
new file mode 100644
index 000000000..a6bd3e196
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.h
@@ -0,0 +1,169 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <compare>
8#include <optional>
9#include <span>
10#include <string>
11#include <vector>
12
13#include <boost/container/small_vector.hpp>
14#include <boost/intrusive/set.hpp>
15
16#include "shader_recompiler/environment.h"
17#include "shader_recompiler/frontend/ir/condition.h"
18#include "shader_recompiler/frontend/maxwell/instruction.h"
19#include "shader_recompiler/frontend/maxwell/location.h"
20#include "shader_recompiler/frontend/maxwell/opcodes.h"
21#include "shader_recompiler/object_pool.h"
22
23namespace Shader::Maxwell::Flow {
24
25struct Block;
26
27using FunctionId = size_t;
28
29enum class EndClass {
30 Branch,
31 IndirectBranch,
32 Call,
33 Exit,
34 Return,
35 Kill,
36};
37
38enum class Token {
39 SSY,
40 PBK,
41 PEXIT,
42 PRET,
43 PCNT,
44 PLONGJMP,
45};
46
47struct StackEntry {
48 auto operator<=>(const StackEntry&) const noexcept = default;
49
50 Token token;
51 Location target;
52};
53
54class Stack {
55public:
56 void Push(Token token, Location target);
57 [[nodiscard]] std::pair<Location, Stack> Pop(Token token) const;
58 [[nodiscard]] std::optional<Location> Peek(Token token) const;
59 [[nodiscard]] Stack Remove(Token token) const;
60
61private:
62 boost::container::small_vector<StackEntry, 3> entries;
63};
64
65struct IndirectBranch {
66 Block* block;
67 u32 address;
68};
69
70struct Block : boost::intrusive::set_base_hook<
71 // Normal link is ~2.5% faster compared to safe link
72 boost::intrusive::link_mode<boost::intrusive::normal_link>> {
73 [[nodiscard]] bool Contains(Location pc) const noexcept;
74
75 bool operator<(const Block& rhs) const noexcept {
76 return begin < rhs.begin;
77 }
78
79 Location begin;
80 Location end;
81 EndClass end_class{};
82 IR::Condition cond{};
83 Stack stack;
84 Block* branch_true{};
85 Block* branch_false{};
86 FunctionId function_call{};
87 Block* return_block{};
88 IR::Reg branch_reg{};
89 s32 branch_offset{};
90 std::vector<IndirectBranch> indirect_branches;
91};
92
93struct Label {
94 Location address;
95 Block* block;
96 Stack stack;
97};
98
99struct Function {
100 explicit Function(ObjectPool<Block>& block_pool, Location start_address);
101
102 Location entrypoint;
103 boost::container::small_vector<Label, 16> labels;
104 boost::intrusive::set<Block> blocks;
105};
106
107class CFG {
108 enum class AnalysisState {
109 Branch,
110 Continue,
111 };
112
113public:
114 explicit CFG(Environment& env, ObjectPool<Block>& block_pool, Location start_address,
115 bool exits_to_dispatcher = false);
116
117 CFG& operator=(const CFG&) = delete;
118 CFG(const CFG&) = delete;
119
120 CFG& operator=(CFG&&) = delete;
121 CFG(CFG&&) = delete;
122
123 [[nodiscard]] std::string Dot() const;
124
125 [[nodiscard]] std::span<const Function> Functions() const noexcept {
126 return std::span(functions.data(), functions.size());
127 }
128 [[nodiscard]] std::span<Function> Functions() noexcept {
129 return std::span(functions.data(), functions.size());
130 }
131
132 [[nodiscard]] bool ExitsToDispatcher() const {
133 return exits_to_dispatcher;
134 }
135
136private:
137 void AnalyzeLabel(FunctionId function_id, Label& label);
138
139 /// Inspect already visited blocks.
140 /// Return true when the block has already been visited
141 bool InspectVisitedBlocks(FunctionId function_id, const Label& label);
142
143 AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc);
144
145 void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class,
146 IR::Condition cond);
147
148 /// Return true when the branch instruction is confirmed to be a branch
149 bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
150 Opcode opcode);
151
152 void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
153 bool is_absolute);
154 AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
155 FunctionId function_id);
156 AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst);
157
158 /// Return the branch target block id
159 Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id);
160
161 Environment& env;
162 ObjectPool<Block>& block_pool;
163 boost::container::small_vector<Function, 1> functions;
164 Location program_start;
165 bool exits_to_dispatcher{};
166 Block* dispatch_block{};
167};
168
169} // namespace Shader::Maxwell::Flow
diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp
new file mode 100644
index 000000000..972f677dc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/decode.cpp
@@ -0,0 +1,149 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <bit>
8#include <memory>
9#include <string_view>
10
11#include "common/common_types.h"
12#include "shader_recompiler/exception.h"
13#include "shader_recompiler/frontend/maxwell/decode.h"
14#include "shader_recompiler/frontend/maxwell/opcodes.h"
15
16namespace Shader::Maxwell {
17namespace {
18struct MaskValue {
19 u64 mask;
20 u64 value;
21};
22
23constexpr MaskValue MaskValueFromEncoding(const char* encoding) {
24 u64 mask{};
25 u64 value{};
26 u64 bit{u64(1) << 63};
27 while (*encoding) {
28 switch (*encoding) {
29 case '0':
30 mask |= bit;
31 break;
32 case '1':
33 mask |= bit;
34 value |= bit;
35 break;
36 case '-':
37 break;
38 case ' ':
39 break;
40 default:
41 throw LogicError("Invalid encoding character '{}'", *encoding);
42 }
43 ++encoding;
44 if (*encoding != ' ') {
45 bit >>= 1;
46 }
47 }
48 return MaskValue{.mask = mask, .value = value};
49}
50
51struct InstEncoding {
52 MaskValue mask_value;
53 Opcode opcode;
54};
55constexpr std::array UNORDERED_ENCODINGS{
56#define INST(name, cute, encode) \
57 InstEncoding{ \
58 .mask_value{MaskValueFromEncoding(encode)}, \
59 .opcode = Opcode::name, \
60 },
61#include "maxwell.inc"
62#undef INST
63};
64
65constexpr auto SortedEncodings() {
66 std::array encodings{UNORDERED_ENCODINGS};
67 std::ranges::sort(encodings, [](const InstEncoding& lhs, const InstEncoding& rhs) {
68 return std::popcount(lhs.mask_value.mask) > std::popcount(rhs.mask_value.mask);
69 });
70 return encodings;
71}
72constexpr auto ENCODINGS{SortedEncodings()};
73
74constexpr int WidestLeftBits() {
75 int bits{64};
76 for (const InstEncoding& encoding : ENCODINGS) {
77 bits = std::min(bits, std::countr_zero(encoding.mask_value.mask));
78 }
79 return 64 - bits;
80}
81constexpr int WIDEST_LEFT_BITS{WidestLeftBits()};
82constexpr int MASK_SHIFT{64 - WIDEST_LEFT_BITS};
83
84constexpr size_t ToFastLookupIndex(u64 value) {
85 return static_cast<size_t>(value >> MASK_SHIFT);
86}
87
88constexpr size_t FastLookupSize() {
89 size_t max_width{};
90 for (const InstEncoding& encoding : ENCODINGS) {
91 max_width = std::max(max_width, ToFastLookupIndex(encoding.mask_value.mask));
92 }
93 return max_width + 1;
94}
95constexpr size_t FAST_LOOKUP_SIZE{FastLookupSize()};
96
97struct InstInfo {
98 [[nodiscard]] u64 Mask() const noexcept {
99 return static_cast<u64>(high_mask) << MASK_SHIFT;
100 }
101
102 [[nodiscard]] u64 Value() const noexcept {
103 return static_cast<u64>(high_value) << MASK_SHIFT;
104 }
105
106 u16 high_mask;
107 u16 high_value;
108 Opcode opcode;
109};
110
111constexpr auto MakeFastLookupTableIndex(size_t index) {
112 std::array<InstInfo, 2> encodings{};
113 size_t element{};
114 for (const auto& encoding : ENCODINGS) {
115 const size_t mask{ToFastLookupIndex(encoding.mask_value.mask)};
116 const size_t value{ToFastLookupIndex(encoding.mask_value.value)};
117 if ((index & mask) == value) {
118 encodings.at(element) = InstInfo{
119 .high_mask = static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT),
120 .high_value = static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT),
121 .opcode = encoding.opcode,
122 };
123 ++element;
124 }
125 }
126 return encodings;
127}
128
129/*constexpr*/ auto MakeFastLookupTable() {
130 auto encodings{std::make_unique<std::array<std::array<InstInfo, 2>, FAST_LOOKUP_SIZE>>()};
131 for (size_t index = 0; index < FAST_LOOKUP_SIZE; ++index) {
132 (*encodings)[index] = MakeFastLookupTableIndex(index);
133 }
134 return encodings;
135}
136const auto FAST_LOOKUP_TABLE{MakeFastLookupTable()};
137} // Anonymous namespace
138
139Opcode Decode(u64 insn) {
140 const auto& table{(*FAST_LOOKUP_TABLE)[ToFastLookupIndex(insn)]};
141 const auto it{std::ranges::find_if(
142 table, [insn](const InstInfo& info) { return (insn & info.Mask()) == info.Value(); })};
143 if (it == table.end()) {
144 throw NotImplementedException("Instruction 0x{:016x} is unknown / unimplemented", insn);
145 }
146 return it->opcode;
147}
148
149} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h
new file mode 100644
index 000000000..b4f080fd7
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/decode.h
@@ -0,0 +1,14 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9
10namespace Shader::Maxwell {
11
12[[nodiscard]] Opcode Decode(u64 insn);
13
14} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
new file mode 100644
index 000000000..008625cb3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
@@ -0,0 +1,108 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/maxwell/decode.h"
10#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
11#include "shader_recompiler/frontend/maxwell/opcodes.h"
12#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
13
14namespace Shader::Maxwell {
15namespace {
16union Encoding {
17 u64 raw;
18 BitField<0, 8, IR::Reg> dest_reg;
19 BitField<8, 8, IR::Reg> src_reg;
20 BitField<20, 19, u64> immediate;
21 BitField<56, 1, u64> is_negative;
22 BitField<20, 24, s64> brx_offset;
23};
24
25template <typename Callable>
26std::optional<u64> Track(Environment& env, Location block_begin, Location& pos, Callable&& func) {
27 while (pos >= block_begin) {
28 const u64 insn{env.ReadInstruction(pos.Offset())};
29 --pos;
30 if (func(insn, Decode(insn))) {
31 return insn;
32 }
33 }
34 return std::nullopt;
35}
36
37std::optional<u64> TrackLDC(Environment& env, Location block_begin, Location& pos,
38 IR::Reg brx_reg) {
39 return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) {
40 const LDC::Encoding ldc{insn};
41 return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 &&
42 ldc.mode == LDC::Mode::Default;
43 });
44}
45
46std::optional<u64> TrackSHL(Environment& env, Location block_begin, Location& pos,
47 IR::Reg ldc_reg) {
48 return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) {
49 const Encoding shl{insn};
50 return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg;
51 });
52}
53
54std::optional<u64> TrackIMNMX(Environment& env, Location block_begin, Location& pos,
55 IR::Reg shl_reg) {
56 return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) {
57 const Encoding imnmx{insn};
58 return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg;
59 });
60}
61} // Anonymous namespace
62
63std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
64 Location block_begin) {
65 const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())};
66 const Opcode brx_opcode{Decode(brx_insn)};
67 if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) {
68 throw LogicError("Tracked instruction is not BRX or JMX");
69 }
70 const IR::Reg brx_reg{Encoding{brx_insn}.src_reg};
71 const s32 brx_offset{static_cast<s32>(Encoding{brx_insn}.brx_offset)};
72
73 Location pos{brx_pos};
74 const std::optional<u64> ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)};
75 if (!ldc_insn) {
76 return std::nullopt;
77 }
78 const LDC::Encoding ldc{*ldc_insn};
79 const u32 cbuf_index{static_cast<u32>(ldc.index)};
80 const u32 cbuf_offset{static_cast<u32>(static_cast<s32>(ldc.offset.Value()))};
81 const IR::Reg ldc_reg{ldc.src_reg};
82
83 const std::optional<u64> shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)};
84 if (!shl_insn) {
85 return std::nullopt;
86 }
87 const Encoding shl{*shl_insn};
88 const IR::Reg shl_reg{shl.src_reg};
89
90 const std::optional<u64> imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)};
91 if (!imnmx_insn) {
92 return std::nullopt;
93 }
94 const Encoding imnmx{*imnmx_insn};
95 if (imnmx.is_negative != 0) {
96 return std::nullopt;
97 }
98 const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())};
99 return IndirectBranchTableInfo{
100 .cbuf_index = cbuf_index,
101 .cbuf_offset = cbuf_offset,
102 .num_entries = imnmx_immediate + 1,
103 .branch_offset = brx_offset,
104 .branch_reg = brx_reg,
105 };
106}
107
108} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
new file mode 100644
index 000000000..eee5102fa
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8
9#include "common/bit_field.h"
10#include "common/common_types.h"
11#include "shader_recompiler/environment.h"
12#include "shader_recompiler/frontend/ir/reg.h"
13#include "shader_recompiler/frontend/maxwell/location.h"
14
15namespace Shader::Maxwell {
16
17struct IndirectBranchTableInfo {
18 u32 cbuf_index{};
19 u32 cbuf_offset{};
20 u32 num_entries{};
21 s32 branch_offset{};
22 IR::Reg branch_reg{};
23};
24
25std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
26 Location block_begin);
27
28} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h
new file mode 100644
index 000000000..743d68d61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/instruction.h
@@ -0,0 +1,63 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/flow_test.h"
10#include "shader_recompiler/frontend/ir/reg.h"
11
12namespace Shader::Maxwell {
13
14struct Predicate {
15 Predicate() = default;
16 Predicate(unsigned index_, bool negated_ = false) : index{index_}, negated{negated_} {}
17 Predicate(bool value) : index{7}, negated{!value} {}
18 Predicate(u64 raw) : index{static_cast<unsigned>(raw & 7)}, negated{(raw & 8) != 0} {}
19
20 unsigned index;
21 bool negated;
22};
23
24inline bool operator==(const Predicate& lhs, const Predicate& rhs) noexcept {
25 return lhs.index == rhs.index && lhs.negated == rhs.negated;
26}
27
28inline bool operator!=(const Predicate& lhs, const Predicate& rhs) noexcept {
29 return !(lhs == rhs);
30}
31
32union Instruction {
33 Instruction(u64 raw_) : raw{raw_} {}
34
35 u64 raw;
36
37 union {
38 BitField<5, 1, u64> is_cbuf;
39 BitField<0, 5, IR::FlowTest> flow_test;
40
41 [[nodiscard]] u32 Absolute() const noexcept {
42 return static_cast<u32>(absolute);
43 }
44
45 [[nodiscard]] s32 Offset() const noexcept {
46 return static_cast<s32>(offset);
47 }
48
49 private:
50 BitField<20, 24, s64> offset;
51 BitField<20, 32, u64> absolute;
52 } branch;
53
54 [[nodiscard]] Predicate Pred() const noexcept {
55 return Predicate{pred};
56 }
57
58private:
59 BitField<16, 4, u64> pred;
60};
61static_assert(std::is_trivially_copyable_v<Instruction>);
62
63} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h
new file mode 100644
index 000000000..26d29eae2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/location.h
@@ -0,0 +1,112 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <compare>
8#include <iterator>
9
10#include <fmt/format.h>
11
12#include "common/common_types.h"
13#include "shader_recompiler/exception.h"
14
15namespace Shader::Maxwell {
16
17class Location {
18 static constexpr u32 VIRTUAL_BIAS{4};
19
20public:
21 constexpr Location() = default;
22
23 constexpr Location(u32 initial_offset) : offset{initial_offset} {
24 if (initial_offset % 8 != 0) {
25 throw InvalidArgument("initial_offset={} is not a multiple of 8", initial_offset);
26 }
27 Align();
28 }
29
30 constexpr Location Virtual() const noexcept {
31 Location virtual_location;
32 virtual_location.offset = offset - VIRTUAL_BIAS;
33 return virtual_location;
34 }
35
36 [[nodiscard]] constexpr u32 Offset() const noexcept {
37 return offset;
38 }
39
40 [[nodiscard]] constexpr bool IsVirtual() const {
41 return offset % 8 == VIRTUAL_BIAS;
42 }
43
44 constexpr auto operator<=>(const Location&) const noexcept = default;
45
46 constexpr Location operator++() noexcept {
47 const Location copy{*this};
48 Step();
49 return copy;
50 }
51
52 constexpr Location operator++(int) noexcept {
53 Step();
54 return *this;
55 }
56
57 constexpr Location operator--() noexcept {
58 const Location copy{*this};
59 Back();
60 return copy;
61 }
62
63 constexpr Location operator--(int) noexcept {
64 Back();
65 return *this;
66 }
67
68 constexpr Location operator+(int number) const {
69 Location new_pc{*this};
70 while (number > 0) {
71 --number;
72 ++new_pc;
73 }
74 while (number < 0) {
75 ++number;
76 --new_pc;
77 }
78 return new_pc;
79 }
80
81 constexpr Location operator-(int number) const {
82 return operator+(-number);
83 }
84
85private:
86 constexpr void Align() {
87 offset += offset % 32 == 0 ? 8 : 0;
88 }
89
90 constexpr void Step() {
91 offset += 8 + (offset % 32 == 24 ? 8 : 0);
92 }
93
94 constexpr void Back() {
95 offset -= 8 + (offset % 32 == 8 ? 8 : 0);
96 }
97
98 u32 offset{0xcccccccc};
99};
100
101} // namespace Shader::Maxwell
102
103template <>
104struct fmt::formatter<Shader::Maxwell::Location> {
105 constexpr auto parse(format_parse_context& ctx) {
106 return ctx.begin();
107 }
108 template <typename FormatContext>
109 auto format(const Shader::Maxwell::Location& location, FormatContext& ctx) {
110 return fmt::format_to(ctx.out(), "{:04x}", location.Offset());
111 }
112};
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc
new file mode 100644
index 000000000..2fee591bb
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc
@@ -0,0 +1,286 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5INST(AL2P, "AL2P", "1110 1111 1010 0---")
6INST(ALD, "ALD", "1110 1111 1101 1---")
7INST(AST, "AST", "1110 1111 1111 0---")
8INST(ATOM_cas, "ATOM (cas)", "1110 1110 1111 ----")
9INST(ATOM, "ATOM", "1110 1101 ---- ----")
10INST(ATOMS_cas, "ATOMS (cas)", "1110 1110 ---- ----")
11INST(ATOMS, "ATOMS", "1110 1100 ---- ----")
12INST(B2R, "B2R", "1111 0000 1011 1---")
13INST(BAR, "BAR", "1111 0000 1010 1---")
14INST(BFE_reg, "BFE (reg)", "0101 1100 0000 0---")
15INST(BFE_cbuf, "BFE (cbuf)", "0100 1100 0000 0---")
16INST(BFE_imm, "BFE (imm)", "0011 100- 0000 0---")
17INST(BFI_reg, "BFI (reg)", "0101 1011 1111 0---")
18INST(BFI_rc, "BFI (rc)", "0101 0011 1111 0---")
19INST(BFI_cr, "BFI (cr)", "0100 1011 1111 0---")
20INST(BFI_imm, "BFI (imm)", "0011 011- 1111 0---")
21INST(BPT, "BPT", "1110 0011 1010 ----")
22INST(BRA, "BRA", "1110 0010 0100 ----")
23INST(BRK, "BRK", "1110 0011 0100 ----")
24INST(BRX, "BRX", "1110 0010 0101 ----")
25INST(CAL, "CAL", "1110 0010 0110 ----")
26INST(CCTL, "CCTL", "1110 1111 011- ----")
27INST(CCTLL, "CCTLL", "1110 1111 100- ----")
28INST(CONT, "CONT", "1110 0011 0101 ----")
29INST(CS2R, "CS2R", "0101 0000 1100 1---")
30INST(CSET, "CSET", "0101 0000 1001 1---")
31INST(CSETP, "CSETP", "0101 0000 1010 0---")
32INST(DADD_reg, "DADD (reg)", "0101 1100 0111 0---")
33INST(DADD_cbuf, "DADD (cbuf)", "0100 1100 0111 0---")
34INST(DADD_imm, "DADD (imm)", "0011 100- 0111 0---")
35INST(DEPBAR, "DEPBAR", "1111 0000 1111 0---")
36INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----")
37INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----")
38INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----")
39INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----")
40INST(DMNMX_reg, "DMNMX (reg)", "0101 1100 0101 0---")
41INST(DMNMX_cbuf, "DMNMX (cbuf)", "0100 1100 0101 0---")
42INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---")
43INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---")
44INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---")
45INST(DMUL_imm, "DMUL (imm)", "0011 100- 1000 0---")
46INST(DSET_reg, "DSET (reg)", "0101 1001 0--- ----")
47INST(DSET_cbuf, "DSET (cbuf)", "0100 1001 0--- ----")
48INST(DSET_imm, "DSET (imm)", "0011 001- 0--- ----")
49INST(DSETP_reg, "DSETP (reg)", "0101 1011 1000 ----")
50INST(DSETP_cbuf, "DSETP (cbuf)", "0100 1011 1000 ----")
51INST(DSETP_imm, "DSETP (imm)", "0011 011- 1000 ----")
52INST(EXIT, "EXIT", "1110 0011 0000 ----")
53INST(F2F_reg, "F2F (reg)", "0101 1100 1010 1---")
54INST(F2F_cbuf, "F2F (cbuf)", "0100 1100 1010 1---")
55INST(F2F_imm, "F2F (imm)", "0011 100- 1010 1---")
56INST(F2I_reg, "F2I (reg)", "0101 1100 1011 0---")
57INST(F2I_cbuf, "F2I (cbuf)", "0100 1100 1011 0---")
58INST(F2I_imm, "F2I (imm)", "0011 100- 1011 0---")
59INST(FADD_reg, "FADD (reg)", "0101 1100 0101 1---")
60INST(FADD_cbuf, "FADD (cbuf)", "0100 1100 0101 1---")
61INST(FADD_imm, "FADD (imm)", "0011 100- 0101 1---")
62INST(FADD32I, "FADD32I", "0000 10-- ---- ----")
63INST(FCHK_reg, "FCHK (reg)", "0101 1100 1000 1---")
64INST(FCHK_cbuf, "FCHK (cbuf)", "0100 1100 1000 1---")
65INST(FCHK_imm, "FCHK (imm)", "0011 100- 1000 1---")
66INST(FCMP_reg, "FCMP (reg)", "0101 1011 1010 ----")
67INST(FCMP_rc, "FCMP (rc)", "0101 0011 1010 ----")
68INST(FCMP_cr, "FCMP (cr)", "0100 1011 1010 ----")
69INST(FCMP_imm, "FCMP (imm)", "0011 011- 1010 ----")
70INST(FFMA_reg, "FFMA (reg)", "0101 1001 1--- ----")
71INST(FFMA_rc, "FFMA (rc)", "0101 0001 1--- ----")
72INST(FFMA_cr, "FFMA (cr)", "0100 1001 1--- ----")
73INST(FFMA_imm, "FFMA (imm)", "0011 001- 1--- ----")
74INST(FFMA32I, "FFMA32I", "0000 11-- ---- ----")
75INST(FLO_reg, "FLO (reg)", "0101 1100 0011 0---")
76INST(FLO_cbuf, "FLO (cbuf)", "0100 1100 0011 0---")
77INST(FLO_imm, "FLO (imm)", "0011 100- 0011 0---")
78INST(FMNMX_reg, "FMNMX (reg)", "0101 1100 0110 0---")
79INST(FMNMX_cbuf, "FMNMX (cbuf)", "0100 1100 0110 0---")
80INST(FMNMX_imm, "FMNMX (imm)", "0011 100- 0110 0---")
81INST(FMUL_reg, "FMUL (reg)", "0101 1100 0110 1---")
82INST(FMUL_cbuf, "FMUL (cbuf)", "0100 1100 0110 1---")
83INST(FMUL_imm, "FMUL (imm)", "0011 100- 0110 1---")
84INST(FMUL32I, "FMUL32I", "0001 1110 ---- ----")
85INST(FSET_reg, "FSET (reg)", "0101 1000 ---- ----")
86INST(FSET_cbuf, "FSET (cbuf)", "0100 1000 ---- ----")
87INST(FSET_imm, "FSET (imm)", "0011 000- ---- ----")
88INST(FSETP_reg, "FSETP (reg)", "0101 1011 1011 ----")
89INST(FSETP_cbuf, "FSETP (cbuf)", "0100 1011 1011 ----")
90INST(FSETP_imm, "FSETP (imm)", "0011 011- 1011 ----")
91INST(FSWZADD, "FSWZADD", "0101 0000 1111 1---")
92INST(GETCRSPTR, "GETCRSPTR", "1110 0010 1100 ----")
93INST(GETLMEMBASE, "GETLMEMBASE", "1110 0010 1101 ----")
94INST(HADD2_reg, "HADD2 (reg)", "0101 1101 0001 0---")
95INST(HADD2_cbuf, "HADD2 (cbuf)", "0111 101- 1--- ----")
96INST(HADD2_imm, "HADD2 (imm)", "0111 101- 0--- ----")
97INST(HADD2_32I, "HADD2_32I", "0010 110- ---- ----")
98INST(HFMA2_reg, "HFMA2 (reg)", "0101 1101 0000 0---")
99INST(HFMA2_rc, "HFMA2 (rc)", "0110 0--- 1--- ----")
100INST(HFMA2_cr, "HFMA2 (cr)", "0111 0--- 1--- ----")
101INST(HFMA2_imm, "HFMA2 (imm)", "0111 0--- 0--- ----")
102INST(HFMA2_32I, "HFMA2_32I", "0010 100- ---- ----")
103INST(HMUL2_reg, "HMUL2 (reg)", "0101 1101 0000 1---")
104INST(HMUL2_cbuf, "HMUL2 (cbuf)", "0111 100- 1--- ----")
105INST(HMUL2_imm, "HMUL2 (imm)", "0111 100- 0--- ----")
106INST(HMUL2_32I, "HMUL2_32I", "0010 101- ---- ----")
107INST(HSET2_reg, "HSET2 (reg)", "0101 1101 0001 1---")
108INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 110- 1--- ----")
109INST(HSET2_imm, "HSET2 (imm)", "0111 110- 0--- ----")
110INST(HSETP2_reg, "HSETP2 (reg)", "0101 1101 0010 0---")
111INST(HSETP2_cbuf, "HSETP2 (cbuf)", "0111 111- 1--- ----")
112INST(HSETP2_imm, "HSETP2 (imm)", "0111 111- 0--- ----")
113INST(I2F_reg, "I2F (reg)", "0101 1100 1011 1---")
114INST(I2F_cbuf, "I2F (cbuf)", "0100 1100 1011 1---")
115INST(I2F_imm, "I2F (imm)", "0011 100- 1011 1---")
116INST(I2I_reg, "I2I (reg)", "0101 1100 1110 0---")
117INST(I2I_cbuf, "I2I (cbuf)", "0100 1100 1110 0---")
118INST(I2I_imm, "I2I (imm)", "0011 100- 1110 0---")
119INST(IADD_reg, "IADD (reg)", "0101 1100 0001 0---")
120INST(IADD_cbuf, "IADD (cbuf)", "0100 1100 0001 0---")
121INST(IADD_imm, "IADD (imm)", "0011 100- 0001 0---")
122INST(IADD3_reg, "IADD3 (reg)", "0101 1100 1100 ----")
123INST(IADD3_cbuf, "IADD3 (cbuf)", "0100 1100 1100 ----")
124INST(IADD3_imm, "IADD3 (imm)", "0011 100- 1100 ----")
125INST(IADD32I, "IADD32I", "0001 110- ---- ----")
126INST(ICMP_reg, "ICMP (reg)", "0101 1011 0100 ----")
127INST(ICMP_rc, "ICMP (rc)", "0101 0011 0100 ----")
128INST(ICMP_cr, "ICMP (cr)", "0100 1011 0100 ----")
129INST(ICMP_imm, "ICMP (imm)", "0011 011- 0100 ----")
130INST(IDE, "IDE", "1110 0011 1001 ----")
131INST(IDP_reg, "IDP (reg)", "0101 0011 1111 1---")
132INST(IDP_imm, "IDP (imm)", "0101 0011 1101 1---")
133INST(IMAD_reg, "IMAD (reg)", "0101 1010 0--- ----")
134INST(IMAD_rc, "IMAD (rc)", "0101 0010 0--- ----")
135INST(IMAD_cr, "IMAD (cr)", "0100 1010 0--- ----")
136INST(IMAD_imm, "IMAD (imm)", "0011 010- 0--- ----")
137INST(IMAD32I, "IMAD32I", "1000 00-- ---- ----")
138INST(IMADSP_reg, "IMADSP (reg)", "0101 1010 1--- ----")
139INST(IMADSP_rc, "IMADSP (rc)", "0101 0010 1--- ----")
140INST(IMADSP_cr, "IMADSP (cr)", "0100 1010 1--- ----")
141INST(IMADSP_imm, "IMADSP (imm)", "0011 010- 1--- ----")
142INST(IMNMX_reg, "IMNMX (reg)", "0101 1100 0010 0---")
143INST(IMNMX_cbuf, "IMNMX (cbuf)", "0100 1100 0010 0---")
144INST(IMNMX_imm, "IMNMX (imm)", "0011 100- 0010 0---")
145INST(IMUL_reg, "IMUL (reg)", "0101 1100 0011 1---")
146INST(IMUL_cbuf, "IMUL (cbuf)", "0100 1100 0011 1---")
147INST(IMUL_imm, "IMUL (imm)", "0011 100- 0011 1---")
148INST(IMUL32I, "IMUL32I", "0001 1111 ---- ----")
149INST(IPA, "IPA", "1110 0000 ---- ----")
150INST(ISBERD, "ISBERD", "1110 1111 1101 0---")
151INST(ISCADD_reg, "ISCADD (reg)", "0101 1100 0001 1---")
152INST(ISCADD_cbuf, "ISCADD (cbuf)", "0100 1100 0001 1---")
153INST(ISCADD_imm, "ISCADD (imm)", "0011 100- 0001 1---")
154INST(ISCADD32I, "ISCADD32I", "0001 01-- ---- ----")
155INST(ISET_reg, "ISET (reg)", "0101 1011 0101 ----")
156INST(ISET_cbuf, "ISET (cbuf)", "0100 1011 0101 ----")
157INST(ISET_imm, "ISET (imm)", "0011 011- 0101 ----")
158INST(ISETP_reg, "ISETP (reg)", "0101 1011 0110 ----")
159INST(ISETP_cbuf, "ISETP (cbuf)", "0100 1011 0110 ----")
160INST(ISETP_imm, "ISETP (imm)", "0011 011- 0110 ----")
161INST(JCAL, "JCAL", "1110 0010 0010 ----")
162INST(JMP, "JMP", "1110 0010 0001 ----")
163INST(JMX, "JMX", "1110 0010 0000 ----")
164INST(KIL, "KIL", "1110 0011 0011 ----")
165INST(LD, "LD", "100- ---- ---- ----")
166INST(LDC, "LDC", "1110 1111 1001 0---")
167INST(LDG, "LDG", "1110 1110 1101 0---")
168INST(LDL, "LDL", "1110 1111 0100 0---")
169INST(LDS, "LDS", "1110 1111 0100 1---")
170INST(LEA_hi_reg, "LEA (hi reg)", "0101 1011 1101 1---")
171INST(LEA_hi_cbuf, "LEA (hi cbuf)", "0001 10-- ---- ----")
172INST(LEA_lo_reg, "LEA (lo reg)", "0101 1011 1101 0---")
173INST(LEA_lo_cbuf, "LEA (lo cbuf)", "0100 1011 1101 ----")
174INST(LEA_lo_imm, "LEA (lo imm)", "0011 011- 1101 0---")
175INST(LEPC, "LEPC", "0101 0000 1101 0---")
176INST(LONGJMP, "LONGJMP", "1110 0011 0001 ----")
177INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---")
178INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---")
179INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---")
180INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---")
181INST(LOP3_cbuf, "LOP3 (cbuf)", "0000 001- ---- ----")
182INST(LOP3_imm, "LOP3 (imm)", "0011 11-- ---- ----")
183INST(LOP32I, "LOP32I", "0000 01-- ---- ----")
184INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---")
185INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---")
186INST(MOV_cbuf, "MOV (cbuf)", "0100 1100 1001 1---")
187INST(MOV_imm, "MOV (imm)", "0011 100- 1001 1---")
188INST(MOV32I, "MOV32I", "0000 0001 0000 ----")
189INST(MUFU, "MUFU", "0101 0000 1000 0---")
190INST(NOP, "NOP", "0101 0000 1011 0---")
191INST(OUT_reg, "OUT (reg)", "1111 1011 1110 0---")
192INST(OUT_cbuf, "OUT (cbuf)", "1110 1011 1110 0---")
193INST(OUT_imm, "OUT (imm)", "1111 011- 1110 0---")
194INST(P2R_reg, "P2R (reg)", "0101 1100 1110 1---")
195INST(P2R_cbuf, "P2R (cbuf)", "0100 1100 1110 1---")
196INST(P2R_imm, "P2R (imm)", "0011 1000 1110 1---")
197INST(PBK, "PBK", "1110 0010 1010 ----")
198INST(PCNT, "PCNT", "1110 0010 1011 ----")
199INST(PEXIT, "PEXIT", "1110 0010 0011 ----")
200INST(PIXLD, "PIXLD", "1110 1111 1110 1---")
201INST(PLONGJMP, "PLONGJMP", "1110 0010 1000 ----")
202INST(POPC_reg, "POPC (reg)", "0101 1100 0000 1---")
203INST(POPC_cbuf, "POPC (cbuf)", "0100 1100 0000 1---")
204INST(POPC_imm, "POPC (imm)", "0011 100- 0000 1---")
205INST(PRET, "PRET", "1110 0010 0111 ----")
206INST(PRMT_reg, "PRMT (reg)", "0101 1011 1100 ----")
207INST(PRMT_rc, "PRMT (rc)", "0101 0011 1100 ----")
208INST(PRMT_cr, "PRMT (cr)", "0100 1011 1100 ----")
209INST(PRMT_imm, "PRMT (imm)", "0011 011- 1100 ----")
210INST(PSET, "PSET", "0101 0000 1000 1---")
211INST(PSETP, "PSETP", "0101 0000 1001 0---")
212INST(R2B, "R2B", "1111 0000 1100 0---")
213INST(R2P_reg, "R2P (reg)", "0101 1100 1111 0---")
214INST(R2P_cbuf, "R2P (cbuf)", "0100 1100 1111 0---")
215INST(R2P_imm, "R2P (imm)", "0011 100- 1111 0---")
216INST(RAM, "RAM", "1110 0011 1000 ----")
217INST(RED, "RED", "1110 1011 1111 1---")
218INST(RET, "RET", "1110 0011 0010 ----")
219INST(RRO_reg, "RRO (reg)", "0101 1100 1001 0---")
220INST(RRO_cbuf, "RRO (cbuf)", "0100 1100 1001 0---")
221INST(RRO_imm, "RRO (imm)", "0011 100- 1001 0---")
222INST(RTT, "RTT", "1110 0011 0110 ----")
223INST(S2R, "S2R", "1111 0000 1100 1---")
224INST(SAM, "SAM", "1110 0011 0111 ----")
225INST(SEL_reg, "SEL (reg)", "0101 1100 1010 0---")
226INST(SEL_cbuf, "SEL (cbuf)", "0100 1100 1010 0---")
227INST(SEL_imm, "SEL (imm)", "0011 100- 1010 0---")
228INST(SETCRSPTR, "SETCRSPTR", "1110 0010 1110 ----")
229INST(SETLMEMBASE, "SETLMEMBASE", "1110 0010 1111 ----")
230INST(SHF_l_reg, "SHF (l reg)", "0101 1011 1111 1---")
231INST(SHF_l_imm, "SHF (l imm)", "0011 011- 1111 1---")
232INST(SHF_r_reg, "SHF (r reg)", "0101 1100 1111 1---")
233INST(SHF_r_imm, "SHF (r imm)", "0011 100- 1111 1---")
234INST(SHFL, "SHFL", "1110 1111 0001 0---")
235INST(SHL_reg, "SHL (reg)", "0101 1100 0100 1---")
236INST(SHL_cbuf, "SHL (cbuf)", "0100 1100 0100 1---")
237INST(SHL_imm, "SHL (imm)", "0011 100- 0100 1---")
238INST(SHR_reg, "SHR (reg)", "0101 1100 0010 1---")
239INST(SHR_cbuf, "SHR (cbuf)", "0100 1100 0010 1---")
240INST(SHR_imm, "SHR (imm)", "0011 100- 0010 1---")
241INST(SSY, "SSY", "1110 0010 1001 ----")
242INST(ST, "ST", "101- ---- ---- ----")
243INST(STG, "STG", "1110 1110 1101 1---")
244INST(STL, "STL", "1110 1111 0101 0---")
245INST(STP, "STP", "1110 1110 1010 0---")
246INST(STS, "STS", "1110 1111 0101 1---")
247INST(SUATOM, "SUATOM", "1110 1010 0--- ----")
248INST(SUATOM_cas, "SUATOM_cas", "1110 1010 1--- ----")
249INST(SULD, "SULD", "1110 1011 000- ----")
250INST(SURED, "SURED", "1110 1011 010- ----")
251INST(SUST, "SUST", "1110 1011 001- ----")
252INST(SYNC, "SYNC", "1111 0000 1111 1---")
253INST(TEX, "TEX", "1100 0--- ---- ----")
254INST(TEX_b, "TEX (b)", "1101 1110 10-- ----")
255INST(TEXS, "TEXS", "1101 -00- ---- ----")
256INST(TLD, "TLD", "1101 1100 ---- ----")
257INST(TLD_b, "TLD (b)", "1101 1101 ---- ----")
258INST(TLD4, "TLD4", "1100 10-- ---- ----")
259INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----")
260INST(TLD4S, "TLD4S", "1101 1111 -0-- ----")
261INST(TLDS, "TLDS", "1101 -01- ---- ----")
262INST(TMML, "TMML", "1101 1111 0101 1---")
263INST(TMML_b, "TMML (b)", "1101 1111 0110 0---")
264INST(TXA, "TXA", "1101 1111 0100 0---")
265INST(TXD, "TXD", "1101 1110 00-- ----")
266INST(TXD_b, "TXD (b)", "1101 1110 01-- ----")
267INST(TXQ, "TXQ", "1101 1111 0100 1---")
268INST(TXQ_b, "TXQ (b)", "1101 1111 0101 0---")
269INST(VABSDIFF, "VABSDIFF", "0101 0100 ---- ----")
270INST(VABSDIFF4, "VABSDIFF4", "0101 0000 0--- ----")
271INST(VADD, "VADD", "0010 00-- ---- ----")
272INST(VMAD, "VMAD", "0101 1111 ---- ----")
273INST(VMNMX, "VMNMX", "0011 101- ---- ----")
274INST(VOTE, "VOTE", "0101 0000 1101 1---")
275INST(VOTE_vtg, "VOTE (vtg)", "0101 0000 1110 0---")
276INST(VSET, "VSET", "0100 000- ---- ----")
277INST(VSETP, "VSETP", "0101 0000 1111 0---")
278INST(VSHL, "VSHL", "0101 0111 ---- ----")
279INST(VSHR, "VSHR", "0101 0110 ---- ----")
280INST(XMAD_reg, "XMAD (reg)", "0101 1011 00-- ----")
281INST(XMAD_rc, "XMAD (rc)", "0101 0001 0--- ----")
282INST(XMAD_cr, "XMAD (cr)", "0100 111- ---- ----")
283INST(XMAD_imm, "XMAD (imm)", "0011 011- 00-- ----")
284
285// Removed due to its weird formatting making fast tables larger
286// INST(CCTLT, "CCTLT", "1110 1011 1111 0--0")
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp
new file mode 100644
index 000000000..ccc40c20c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp
@@ -0,0 +1,26 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9
10namespace Shader::Maxwell {
11namespace {
12constexpr std::array NAME_TABLE{
13#define INST(name, cute, encode) cute,
14#include "maxwell.inc"
15#undef INST
16};
17} // Anonymous namespace
18
19const char* NameOf(Opcode opcode) {
20 if (static_cast<size_t>(opcode) >= NAME_TABLE.size()) {
21 throw InvalidArgument("Invalid opcode with raw value {}", static_cast<int>(opcode));
22 }
23 return NAME_TABLE[static_cast<size_t>(opcode)];
24}
25
26} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.h b/src/shader_recompiler/frontend/maxwell/opcodes.h
new file mode 100644
index 000000000..cd574f29d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/opcodes.h
@@ -0,0 +1,30 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <fmt/format.h>
8
9namespace Shader::Maxwell {
10
11enum class Opcode {
12#define INST(name, cute, encode) name,
13#include "maxwell.inc"
14#undef INST
15};
16
17const char* NameOf(Opcode opcode);
18
19} // namespace Shader::Maxwell
20
21template <>
22struct fmt::formatter<Shader::Maxwell::Opcode> {
23 constexpr auto parse(format_parse_context& ctx) {
24 return ctx.begin();
25 }
26 template <typename FormatContext>
27 auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) {
28 return format_to(ctx.out(), "{}", NameOf(opcode));
29 }
30};
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
new file mode 100644
index 000000000..8b3e0a15c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -0,0 +1,883 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7#include <string>
8#include <unordered_map>
9#include <utility>
10#include <vector>
11#include <version>
12
13#include <fmt/format.h>
14
15#include <boost/intrusive/list.hpp>
16
17#include "shader_recompiler/environment.h"
18#include "shader_recompiler/frontend/ir/basic_block.h"
19#include "shader_recompiler/frontend/ir/ir_emitter.h"
20#include "shader_recompiler/frontend/maxwell/decode.h"
21#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
22#include "shader_recompiler/frontend/maxwell/translate/translate.h"
23#include "shader_recompiler/object_pool.h"
24
25namespace Shader::Maxwell {
26namespace {
27struct Statement;
28
29// Use normal_link because we are not guaranteed to destroy the tree in order
30using ListBaseHook =
31 boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::normal_link>>;
32
33using Tree = boost::intrusive::list<Statement,
34 // Allow using Statement without a definition
35 boost::intrusive::base_hook<ListBaseHook>,
36 // Avoid linear complexity on splice, size is never called
37 boost::intrusive::constant_time_size<false>>;
38using Node = Tree::iterator;
39
40enum class StatementType {
41 Code,
42 Goto,
43 Label,
44 If,
45 Loop,
46 Break,
47 Return,
48 Kill,
49 Unreachable,
50 Function,
51 Identity,
52 Not,
53 Or,
54 SetVariable,
55 SetIndirectBranchVariable,
56 Variable,
57 IndirectBranchCond,
58};
59
60bool HasChildren(StatementType type) {
61 switch (type) {
62 case StatementType::If:
63 case StatementType::Loop:
64 case StatementType::Function:
65 return true;
66 default:
67 return false;
68 }
69}
70
71struct Goto {};
72struct Label {};
73struct If {};
74struct Loop {};
75struct Break {};
76struct Return {};
77struct Kill {};
78struct Unreachable {};
79struct FunctionTag {};
80struct Identity {};
81struct Not {};
82struct Or {};
83struct SetVariable {};
84struct SetIndirectBranchVariable {};
85struct Variable {};
86struct IndirectBranchCond {};
87
88#ifdef _MSC_VER
89#pragma warning(push)
90#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement
91#endif
92struct Statement : ListBaseHook {
93 Statement(const Flow::Block* block_, Statement* up_)
94 : block{block_}, up{up_}, type{StatementType::Code} {}
95 Statement(Goto, Statement* cond_, Node label_, Statement* up_)
96 : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {}
97 Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {}
98 Statement(If, Statement* cond_, Tree&& children_, Statement* up_)
99 : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {}
100 Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_)
101 : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {}
102 Statement(Break, Statement* cond_, Statement* up_)
103 : cond{cond_}, up{up_}, type{StatementType::Break} {}
104 Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {}
105 Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {}
106 Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {}
107 Statement(FunctionTag) : children{}, type{StatementType::Function} {}
108 Statement(Identity, IR::Condition cond_, Statement* up_)
109 : guest_cond{cond_}, up{up_}, type{StatementType::Identity} {}
110 Statement(Not, Statement* op_, Statement* up_) : op{op_}, up{up_}, type{StatementType::Not} {}
111 Statement(Or, Statement* op_a_, Statement* op_b_, Statement* up_)
112 : op_a{op_a_}, op_b{op_b_}, up{up_}, type{StatementType::Or} {}
113 Statement(SetVariable, u32 id_, Statement* op_, Statement* up_)
114 : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {}
115 Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_, Statement* up_)
116 : branch_offset{branch_offset_},
117 branch_reg{branch_reg_}, up{up_}, type{StatementType::SetIndirectBranchVariable} {}
118 Statement(Variable, u32 id_, Statement* up_)
119 : id{id_}, up{up_}, type{StatementType::Variable} {}
120 Statement(IndirectBranchCond, u32 location_, Statement* up_)
121 : location{location_}, up{up_}, type{StatementType::IndirectBranchCond} {}
122
123 ~Statement() {
124 if (HasChildren(type)) {
125 std::destroy_at(&children);
126 }
127 }
128
129 union {
130 const Flow::Block* block;
131 Node label;
132 Tree children;
133 IR::Condition guest_cond;
134 Statement* op;
135 Statement* op_a;
136 u32 location;
137 s32 branch_offset;
138 };
139 union {
140 Statement* cond;
141 Statement* op_b;
142 u32 id;
143 IR::Reg branch_reg;
144 };
145 Statement* up{};
146 StatementType type;
147};
148#ifdef _MSC_VER
149#pragma warning(pop)
150#endif
151
152std::string DumpExpr(const Statement* stmt) {
153 switch (stmt->type) {
154 case StatementType::Identity:
155 return fmt::format("{}", stmt->guest_cond);
156 case StatementType::Not:
157 return fmt::format("!{}", DumpExpr(stmt->op));
158 case StatementType::Or:
159 return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b));
160 case StatementType::Variable:
161 return fmt::format("goto_L{}", stmt->id);
162 case StatementType::IndirectBranchCond:
163 return fmt::format("(indirect_branch == {:x})", stmt->location);
164 default:
165 return "<invalid type>";
166 }
167}
168
169[[maybe_unused]] std::string DumpTree(const Tree& tree, u32 indentation = 0) {
170 std::string ret;
171 std::string indent(indentation, ' ');
172 for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) {
173 switch (stmt->type) {
174 case StatementType::Code:
175 ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent,
176 stmt->block->begin.Offset(), stmt->block->end.Offset(),
177 reinterpret_cast<uintptr_t>(stmt->block));
178 break;
179 case StatementType::Goto:
180 ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond),
181 stmt->label->id);
182 break;
183 case StatementType::Label:
184 ret += fmt::format("{}L{}:\n", indent, stmt->id);
185 break;
186 case StatementType::If:
187 ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond));
188 ret += DumpTree(stmt->children, indentation + 4);
189 ret += fmt::format("{} }}\n", indent);
190 break;
191 case StatementType::Loop:
192 ret += fmt::format("{} do {{\n", indent);
193 ret += DumpTree(stmt->children, indentation + 4);
194 ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond));
195 break;
196 case StatementType::Break:
197 ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond));
198 break;
199 case StatementType::Return:
200 ret += fmt::format("{} return;\n", indent);
201 break;
202 case StatementType::Kill:
203 ret += fmt::format("{} kill;\n", indent);
204 break;
205 case StatementType::Unreachable:
206 ret += fmt::format("{} unreachable;\n", indent);
207 break;
208 case StatementType::SetVariable:
209 ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op));
210 break;
211 case StatementType::SetIndirectBranchVariable:
212 ret += fmt::format("{} indirect_branch = {} + {};\n", indent, stmt->branch_reg,
213 stmt->branch_offset);
214 break;
215 case StatementType::Function:
216 case StatementType::Identity:
217 case StatementType::Not:
218 case StatementType::Or:
219 case StatementType::Variable:
220 case StatementType::IndirectBranchCond:
221 throw LogicError("Statement can't be printed");
222 }
223 }
224 return ret;
225}
226
227void SanitizeNoBreaks(const Tree& tree) {
228 if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) {
229 throw NotImplementedException("Capturing statement with break nodes");
230 }
231}
232
233size_t Level(Node stmt) {
234 size_t level{0};
235 Statement* node{stmt->up};
236 while (node) {
237 ++level;
238 node = node->up;
239 }
240 return level;
241}
242
243bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) {
244 const size_t goto_level{Level(goto_stmt)};
245 const size_t label_level{Level(label_stmt)};
246 size_t min_level;
247 size_t max_level;
248 Node min;
249 Node max;
250 if (label_level < goto_level) {
251 min_level = label_level;
252 max_level = goto_level;
253 min = label_stmt;
254 max = goto_stmt;
255 } else { // goto_level < label_level
256 min_level = goto_level;
257 max_level = label_level;
258 min = goto_stmt;
259 max = label_stmt;
260 }
261 while (max_level > min_level) {
262 --max_level;
263 max = max->up;
264 }
265 return min->up == max->up;
266}
267
268bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) {
269 return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt);
270}
271
272[[maybe_unused]] bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept {
273 Node it{goto_stmt};
274 do {
275 if (it == label_stmt) {
276 return true;
277 }
278 --it;
279 } while (it != goto_stmt->up->children.begin());
280 while (it != goto_stmt->up->children.end()) {
281 if (it == label_stmt) {
282 return true;
283 }
284 ++it;
285 }
286 return false;
287}
288
289Node SiblingFromNephew(Node uncle, Node nephew) noexcept {
290 Statement* const parent{uncle->up};
291 Statement* it{&*nephew};
292 while (it->up != parent) {
293 it = it->up;
294 }
295 return Tree::s_iterator_to(*it);
296}
297
298bool AreOrdered(Node left_sibling, Node right_sibling) noexcept {
299 const Node end{right_sibling->up->children.end()};
300 for (auto it = right_sibling; it != end; ++it) {
301 if (it == left_sibling) {
302 return false;
303 }
304 }
305 return true;
306}
307
308bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept {
309 const Node sibling{SiblingFromNephew(goto_stmt, label_stmt)};
310 return AreOrdered(sibling, goto_stmt);
311}
312
313class GotoPass {
314public:
315 explicit GotoPass(Flow::CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} {
316 std::vector gotos{BuildTree(cfg)};
317 const auto end{gotos.rend()};
318 for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) {
319 RemoveGoto(*goto_stmt);
320 }
321 }
322
323 Statement& RootStatement() noexcept {
324 return root_stmt;
325 }
326
327private:
328 void RemoveGoto(Node goto_stmt) {
329 // Force goto_stmt and label_stmt to be directly related
330 const Node label_stmt{goto_stmt->label};
331 if (IsIndirectlyRelated(goto_stmt, label_stmt)) {
332 // Move goto_stmt out using outward-movement transformation until it becomes
333 // directly related to label_stmt
334 while (!IsDirectlyRelated(goto_stmt, label_stmt)) {
335 goto_stmt = MoveOutward(goto_stmt);
336 }
337 }
338 // Force goto_stmt and label_stmt to be siblings
339 if (IsDirectlyRelated(goto_stmt, label_stmt)) {
340 const size_t label_level{Level(label_stmt)};
341 size_t goto_level{Level(goto_stmt)};
342 if (goto_level > label_level) {
343 // Move goto_stmt out of its level using outward-movement transformations
344 while (goto_level > label_level) {
345 goto_stmt = MoveOutward(goto_stmt);
346 --goto_level;
347 }
348 } else { // Level(goto_stmt) < Level(label_stmt)
349 if (NeedsLift(goto_stmt, label_stmt)) {
350 // Lift goto_stmt to above stmt containing label_stmt using goto-lifting
351 // transformations
352 goto_stmt = Lift(goto_stmt);
353 }
354 // Move goto_stmt into label_stmt's level using inward-movement transformation
355 while (goto_level < label_level) {
356 goto_stmt = MoveInward(goto_stmt);
357 ++goto_level;
358 }
359 }
360 }
361 // Expensive operation:
362 // if (!AreSiblings(goto_stmt, label_stmt)) {
363 // throw LogicError("Goto is not a sibling with the label");
364 // }
365 // goto_stmt and label_stmt are guaranteed to be siblings, eliminate
366 if (std::next(goto_stmt) == label_stmt) {
367 // Simply eliminate the goto if the label is next to it
368 goto_stmt->up->children.erase(goto_stmt);
369 } else if (AreOrdered(goto_stmt, label_stmt)) {
370 // Eliminate goto_stmt with a conditional
371 EliminateAsConditional(goto_stmt, label_stmt);
372 } else {
373 // Eliminate goto_stmt with a loop
374 EliminateAsLoop(goto_stmt, label_stmt);
375 }
376 }
377
378 std::vector<Node> BuildTree(Flow::CFG& cfg) {
379 u32 label_id{0};
380 std::vector<Node> gotos;
381 Flow::Function& first_function{cfg.Functions().front()};
382 BuildTree(cfg, first_function, label_id, gotos, root_stmt.children.end(), std::nullopt);
383 return gotos;
384 }
385
386 void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id,
387 std::vector<Node>& gotos, Node function_insert_point,
388 std::optional<Node> return_label) {
389 Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false}, &root_stmt)};
390 Tree& root{root_stmt.children};
391 std::unordered_map<Flow::Block*, Node> local_labels;
392 local_labels.reserve(function.blocks.size());
393
394 for (Flow::Block& block : function.blocks) {
395 Statement* const label{pool.Create(Label{}, label_id, &root_stmt)};
396 const Node label_it{root.insert(function_insert_point, *label)};
397 local_labels.emplace(&block, label_it);
398 ++label_id;
399 }
400 for (Flow::Block& block : function.blocks) {
401 const Node label{local_labels.at(&block)};
402 // Insertion point
403 const Node ip{std::next(label)};
404
405 // Reset goto variables before the first block and after its respective label
406 const auto make_reset_variable{[&]() -> Statement& {
407 return *pool.Create(SetVariable{}, label->id, false_stmt, &root_stmt);
408 }};
409 root.push_front(make_reset_variable());
410 root.insert(ip, make_reset_variable());
411 root.insert(ip, *pool.Create(&block, &root_stmt));
412
413 switch (block.end_class) {
414 case Flow::EndClass::Branch: {
415 Statement* const always_cond{
416 pool.Create(Identity{}, IR::Condition{true}, &root_stmt)};
417 if (block.cond == IR::Condition{true}) {
418 const Node true_label{local_labels.at(block.branch_true)};
419 gotos.push_back(
420 root.insert(ip, *pool.Create(Goto{}, always_cond, true_label, &root_stmt)));
421 } else if (block.cond == IR::Condition{false}) {
422 const Node false_label{local_labels.at(block.branch_false)};
423 gotos.push_back(root.insert(
424 ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt)));
425 } else {
426 const Node true_label{local_labels.at(block.branch_true)};
427 const Node false_label{local_labels.at(block.branch_false)};
428 Statement* const true_cond{pool.Create(Identity{}, block.cond, &root_stmt)};
429 gotos.push_back(
430 root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt)));
431 gotos.push_back(root.insert(
432 ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt)));
433 }
434 break;
435 }
436 case Flow::EndClass::IndirectBranch:
437 root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg,
438 block.branch_offset, &root_stmt));
439 for (const Flow::IndirectBranch& indirect : block.indirect_branches) {
440 const Node indirect_label{local_labels.at(indirect.block)};
441 Statement* cond{
442 pool.Create(IndirectBranchCond{}, indirect.address, &root_stmt)};
443 Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)};
444 gotos.push_back(root.insert(ip, *goto_stmt));
445 }
446 root.insert(ip, *pool.Create(Unreachable{}, &root_stmt));
447 break;
448 case Flow::EndClass::Call: {
449 Flow::Function& call{cfg.Functions()[block.function_call]};
450 const Node call_return_label{local_labels.at(block.return_block)};
451 BuildTree(cfg, call, label_id, gotos, ip, call_return_label);
452 break;
453 }
454 case Flow::EndClass::Exit:
455 root.insert(ip, *pool.Create(Return{}, &root_stmt));
456 break;
457 case Flow::EndClass::Return: {
458 Statement* const always_cond{pool.Create(Identity{}, block.cond, &root_stmt)};
459 auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)};
460 gotos.push_back(root.insert(ip, *goto_stmt));
461 break;
462 }
463 case Flow::EndClass::Kill:
464 root.insert(ip, *pool.Create(Kill{}, &root_stmt));
465 break;
466 }
467 }
468 }
469
470 void UpdateTreeUp(Statement* tree) {
471 for (Statement& stmt : tree->children) {
472 stmt.up = tree;
473 }
474 }
475
476 void EliminateAsConditional(Node goto_stmt, Node label_stmt) {
477 Tree& body{goto_stmt->up->children};
478 Tree if_body;
479 if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt);
480 Statement* const cond{pool.Create(Not{}, goto_stmt->cond, &root_stmt)};
481 Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)};
482 UpdateTreeUp(if_stmt);
483 body.insert(goto_stmt, *if_stmt);
484 body.erase(goto_stmt);
485 }
486
487 void EliminateAsLoop(Node goto_stmt, Node label_stmt) {
488 Tree& body{goto_stmt->up->children};
489 Tree loop_body;
490 loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt);
491 Statement* const cond{goto_stmt->cond};
492 Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)};
493 UpdateTreeUp(loop);
494 body.insert(goto_stmt, *loop);
495 body.erase(goto_stmt);
496 }
497
498 [[nodiscard]] Node MoveOutward(Node goto_stmt) {
499 switch (goto_stmt->up->type) {
500 case StatementType::If:
501 return MoveOutwardIf(goto_stmt);
502 case StatementType::Loop:
503 return MoveOutwardLoop(goto_stmt);
504 default:
505 throw LogicError("Invalid outward movement");
506 }
507 }
508
509 [[nodiscard]] Node MoveInward(Node goto_stmt) {
510 Statement* const parent{goto_stmt->up};
511 Tree& body{parent->children};
512 const Node label{goto_stmt->label};
513 const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)};
514 const u32 label_id{label->id};
515
516 Statement* const goto_cond{goto_stmt->cond};
517 Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
518 body.insert(goto_stmt, *set_var);
519
520 Tree if_body;
521 if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt);
522 Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)};
523 Statement* const neg_var{pool.Create(Not{}, variable, &root_stmt)};
524 if (!if_body.empty()) {
525 Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)};
526 UpdateTreeUp(if_stmt);
527 body.insert(goto_stmt, *if_stmt);
528 }
529 body.erase(goto_stmt);
530
531 switch (label_nested_stmt->type) {
532 case StatementType::If:
533 // Update nested if condition
534 label_nested_stmt->cond =
535 pool.Create(Or{}, variable, label_nested_stmt->cond, &root_stmt);
536 break;
537 case StatementType::Loop:
538 break;
539 default:
540 throw LogicError("Invalid inward movement");
541 }
542 Tree& nested_tree{label_nested_stmt->children};
543 Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)};
544 return nested_tree.insert(nested_tree.begin(), *new_goto);
545 }
546
547 [[nodiscard]] Node Lift(Node goto_stmt) {
548 Statement* const parent{goto_stmt->up};
549 Tree& body{parent->children};
550 const Node label{goto_stmt->label};
551 const u32 label_id{label->id};
552 const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)};
553
554 Tree loop_body;
555 loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt);
556 SanitizeNoBreaks(loop_body);
557 Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)};
558 Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)};
559 UpdateTreeUp(loop_stmt);
560 body.insert(goto_stmt, *loop_stmt);
561
562 Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)};
563 loop_stmt->children.push_front(*new_goto);
564 const Node new_goto_node{loop_stmt->children.begin()};
565
566 Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)};
567 loop_stmt->children.push_back(*set_var);
568
569 body.erase(goto_stmt);
570 return new_goto_node;
571 }
572
573 Node MoveOutwardIf(Node goto_stmt) {
574 const Node parent{Tree::s_iterator_to(*goto_stmt->up)};
575 Tree& body{parent->children};
576 const u32 label_id{goto_stmt->label->id};
577 Statement* const goto_cond{goto_stmt->cond};
578 Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)};
579 body.insert(goto_stmt, *set_goto_var);
580
581 Tree if_body;
582 if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end());
583 if_body.pop_front();
584 Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)};
585 Statement* const neg_cond{pool.Create(Not{}, cond, &root_stmt)};
586 Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)};
587 UpdateTreeUp(if_stmt);
588 body.insert(goto_stmt, *if_stmt);
589
590 body.erase(goto_stmt);
591
592 Statement* const new_cond{pool.Create(Variable{}, label_id, &root_stmt)};
593 Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)};
594 Tree& parent_tree{parent->up->children};
595 return parent_tree.insert(std::next(parent), *new_goto);
596 }
597
598 Node MoveOutwardLoop(Node goto_stmt) {
599 Statement* const parent{goto_stmt->up};
600 Tree& body{parent->children};
601 const u32 label_id{goto_stmt->label->id};
602 Statement* const goto_cond{goto_stmt->cond};
603 Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
604 Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)};
605 Statement* const break_stmt{pool.Create(Break{}, cond, parent)};
606 body.insert(goto_stmt, *set_goto_var);
607 body.insert(goto_stmt, *break_stmt);
608 body.erase(goto_stmt);
609
610 const Node loop{Tree::s_iterator_to(*goto_stmt->up)};
611 Statement* const new_goto_cond{pool.Create(Variable{}, label_id, &root_stmt)};
612 Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)};
613 Tree& parent_tree{loop->up->children};
614 return parent_tree.insert(std::next(loop), *new_goto);
615 }
616
617 ObjectPool<Statement>& pool;
618 Statement root_stmt{FunctionTag{}};
619};
620
621[[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) {
622 Tree& tree{stmt.up->children};
623 const Node end{tree.end()};
624 Node forward_node{std::next(Tree::s_iterator_to(stmt))};
625 while (forward_node != end && !HasChildren(forward_node->type)) {
626 if (forward_node->type == StatementType::Code) {
627 return &*forward_node;
628 }
629 ++forward_node;
630 }
631 return nullptr;
632}
633
634[[nodiscard]] IR::U1 VisitExpr(IR::IREmitter& ir, const Statement& stmt) {
635 switch (stmt.type) {
636 case StatementType::Identity:
637 return ir.Condition(stmt.guest_cond);
638 case StatementType::Not:
639 return ir.LogicalNot(IR::U1{VisitExpr(ir, *stmt.op)});
640 case StatementType::Or:
641 return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b));
642 case StatementType::Variable:
643 return ir.GetGotoVariable(stmt.id);
644 case StatementType::IndirectBranchCond:
645 return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location));
646 default:
647 throw NotImplementedException("Statement type {}", stmt.type);
648 }
649}
650
651class TranslatePass {
652public:
653 TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
654 ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
655 IR::AbstractSyntaxList& syntax_list_)
656 : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
657 syntax_list{syntax_list_} {
658 Visit(root_stmt, nullptr, nullptr);
659
660 IR::Block& first_block{*syntax_list.front().data.block};
661 IR::IREmitter ir(first_block, first_block.begin());
662 ir.Prologue();
663 }
664
665private:
666 void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) {
667 IR::Block* current_block{};
668 const auto ensure_block{[&] {
669 if (current_block) {
670 return;
671 }
672 current_block = block_pool.Create(inst_pool);
673 auto& node{syntax_list.emplace_back()};
674 node.type = IR::AbstractSyntaxNode::Type::Block;
675 node.data.block = current_block;
676 }};
677 Tree& tree{parent.children};
678 for (auto it = tree.begin(); it != tree.end(); ++it) {
679 Statement& stmt{*it};
680 switch (stmt.type) {
681 case StatementType::Label:
682 // Labels can be ignored
683 break;
684 case StatementType::Code: {
685 ensure_block();
686 Translate(env, current_block, stmt.block->begin.Offset(), stmt.block->end.Offset());
687 break;
688 }
689 case StatementType::SetVariable: {
690 ensure_block();
691 IR::IREmitter ir{*current_block};
692 ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op));
693 break;
694 }
695 case StatementType::SetIndirectBranchVariable: {
696 ensure_block();
697 IR::IREmitter ir{*current_block};
698 IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))};
699 ir.SetIndirectBranchVariable(address);
700 break;
701 }
702 case StatementType::If: {
703 ensure_block();
704 IR::Block* const merge_block{MergeBlock(parent, stmt)};
705
706 // Implement if header block
707 IR::IREmitter ir{*current_block};
708 const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
709
710 const size_t if_node_index{syntax_list.size()};
711 syntax_list.emplace_back();
712
713 // Visit children
714 const size_t then_block_index{syntax_list.size()};
715 Visit(stmt, break_block, merge_block);
716
717 IR::Block* const then_block{syntax_list.at(then_block_index).data.block};
718 current_block->AddBranch(then_block);
719 current_block->AddBranch(merge_block);
720 current_block = merge_block;
721
722 auto& if_node{syntax_list[if_node_index]};
723 if_node.type = IR::AbstractSyntaxNode::Type::If;
724 if_node.data.if_node.cond = cond;
725 if_node.data.if_node.body = then_block;
726 if_node.data.if_node.merge = merge_block;
727
728 auto& endif_node{syntax_list.emplace_back()};
729 endif_node.type = IR::AbstractSyntaxNode::Type::EndIf;
730 endif_node.data.end_if.merge = merge_block;
731
732 auto& merge{syntax_list.emplace_back()};
733 merge.type = IR::AbstractSyntaxNode::Type::Block;
734 merge.data.block = merge_block;
735 break;
736 }
737 case StatementType::Loop: {
738 IR::Block* const loop_header_block{block_pool.Create(inst_pool)};
739 if (current_block) {
740 current_block->AddBranch(loop_header_block);
741 }
742 auto& header_node{syntax_list.emplace_back()};
743 header_node.type = IR::AbstractSyntaxNode::Type::Block;
744 header_node.data.block = loop_header_block;
745
746 IR::Block* const continue_block{block_pool.Create(inst_pool)};
747 IR::Block* const merge_block{MergeBlock(parent, stmt)};
748
749 const size_t loop_node_index{syntax_list.size()};
750 syntax_list.emplace_back();
751
752 // Visit children
753 const size_t body_block_index{syntax_list.size()};
754 Visit(stmt, merge_block, continue_block);
755
756 // The continue block is located at the end of the loop
757 IR::IREmitter ir{*continue_block};
758 const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
759
760 IR::Block* const body_block{syntax_list.at(body_block_index).data.block};
761 loop_header_block->AddBranch(body_block);
762
763 continue_block->AddBranch(loop_header_block);
764 continue_block->AddBranch(merge_block);
765
766 current_block = merge_block;
767
768 auto& loop{syntax_list[loop_node_index]};
769 loop.type = IR::AbstractSyntaxNode::Type::Loop;
770 loop.data.loop.body = body_block;
771 loop.data.loop.continue_block = continue_block;
772 loop.data.loop.merge = merge_block;
773
774 auto& continue_block_node{syntax_list.emplace_back()};
775 continue_block_node.type = IR::AbstractSyntaxNode::Type::Block;
776 continue_block_node.data.block = continue_block;
777
778 auto& repeat{syntax_list.emplace_back()};
779 repeat.type = IR::AbstractSyntaxNode::Type::Repeat;
780 repeat.data.repeat.cond = cond;
781 repeat.data.repeat.loop_header = loop_header_block;
782 repeat.data.repeat.merge = merge_block;
783
784 auto& merge{syntax_list.emplace_back()};
785 merge.type = IR::AbstractSyntaxNode::Type::Block;
786 merge.data.block = merge_block;
787 break;
788 }
789 case StatementType::Break: {
790 ensure_block();
791 IR::Block* const skip_block{MergeBlock(parent, stmt)};
792
793 IR::IREmitter ir{*current_block};
794 const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
795 current_block->AddBranch(break_block);
796 current_block->AddBranch(skip_block);
797 current_block = skip_block;
798
799 auto& break_node{syntax_list.emplace_back()};
800 break_node.type = IR::AbstractSyntaxNode::Type::Break;
801 break_node.data.break_node.cond = cond;
802 break_node.data.break_node.merge = break_block;
803 break_node.data.break_node.skip = skip_block;
804
805 auto& merge{syntax_list.emplace_back()};
806 merge.type = IR::AbstractSyntaxNode::Type::Block;
807 merge.data.block = skip_block;
808 break;
809 }
810 case StatementType::Return: {
811 ensure_block();
812 IR::IREmitter{*current_block}.Epilogue();
813 current_block = nullptr;
814 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
815 break;
816 }
817 case StatementType::Kill: {
818 ensure_block();
819 IR::Block* demote_block{MergeBlock(parent, stmt)};
820 IR::IREmitter{*current_block}.DemoteToHelperInvocation();
821 current_block->AddBranch(demote_block);
822 current_block = demote_block;
823
824 auto& merge{syntax_list.emplace_back()};
825 merge.type = IR::AbstractSyntaxNode::Type::Block;
826 merge.data.block = demote_block;
827 break;
828 }
829 case StatementType::Unreachable: {
830 ensure_block();
831 current_block = nullptr;
832 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
833 break;
834 }
835 default:
836 throw NotImplementedException("Statement type {}", stmt.type);
837 }
838 }
839 if (current_block) {
840 if (fallthrough_block) {
841 current_block->AddBranch(fallthrough_block);
842 } else {
843 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
844 }
845 }
846 }
847
848 IR::Block* MergeBlock(Statement& parent, Statement& stmt) {
849 Statement* merge_stmt{TryFindForwardBlock(stmt)};
850 if (!merge_stmt) {
851 // Create a merge block we can visit later
852 merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent);
853 parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt);
854 }
855 return block_pool.Create(inst_pool);
856 }
857
858 ObjectPool<Statement>& stmt_pool;
859 ObjectPool<IR::Inst>& inst_pool;
860 ObjectPool<IR::Block>& block_pool;
861 Environment& env;
862 IR::AbstractSyntaxList& syntax_list;
863
864// TODO: C++20 Remove this when all compilers support constexpr std::vector
865#if __cpp_lib_constexpr_vector >= 201907
866 static constexpr Flow::Block dummy_flow_block;
867#else
868 const Flow::Block dummy_flow_block;
869#endif
870};
871} // Anonymous namespace
872
873IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
874 Environment& env, Flow::CFG& cfg) {
875 ObjectPool<Statement> stmt_pool{64};
876 GotoPass goto_pass{cfg, stmt_pool};
877 Statement& root{goto_pass.RootStatement()};
878 IR::AbstractSyntaxList syntax_list;
879 TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list};
880 return syntax_list;
881}
882
883} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
new file mode 100644
index 000000000..88b083649
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
@@ -0,0 +1,20 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/environment.h"
8#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
9#include "shader_recompiler/frontend/ir/basic_block.h"
10#include "shader_recompiler/frontend/ir/value.h"
11#include "shader_recompiler/frontend/maxwell/control_flow.h"
12#include "shader_recompiler/object_pool.h"
13
14namespace Shader::Maxwell {
15
16[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
17 ObjectPool<IR::Block>& block_pool, Environment& env,
18 Flow::CFG& cfg);
19
20} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
new file mode 100644
index 000000000..d9f999e05
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
@@ -0,0 +1,214 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class AtomOp : u64 {
12 ADD,
13 MIN,
14 MAX,
15 INC,
16 DEC,
17 AND,
18 OR,
19 XOR,
20 EXCH,
21 SAFEADD,
22};
23
24enum class AtomSize : u64 {
25 U32,
26 S32,
27 U64,
28 F32,
29 F16x2,
30 S64,
31};
32
33IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
34 AtomOp op, bool is_signed) {
35 switch (op) {
36 case AtomOp::ADD:
37 return ir.GlobalAtomicIAdd(offset, op_b);
38 case AtomOp::MIN:
39 return ir.GlobalAtomicIMin(offset, op_b, is_signed);
40 case AtomOp::MAX:
41 return ir.GlobalAtomicIMax(offset, op_b, is_signed);
42 case AtomOp::INC:
43 return ir.GlobalAtomicInc(offset, op_b);
44 case AtomOp::DEC:
45 return ir.GlobalAtomicDec(offset, op_b);
46 case AtomOp::AND:
47 return ir.GlobalAtomicAnd(offset, op_b);
48 case AtomOp::OR:
49 return ir.GlobalAtomicOr(offset, op_b);
50 case AtomOp::XOR:
51 return ir.GlobalAtomicXor(offset, op_b);
52 case AtomOp::EXCH:
53 return ir.GlobalAtomicExchange(offset, op_b);
54 default:
55 throw NotImplementedException("Integer Atom Operation {}", op);
56 }
57}
58
59IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
60 AtomSize size) {
61 static constexpr IR::FpControl f16_control{
62 .no_contraction = false,
63 .rounding = IR::FpRounding::RN,
64 .fmz_mode = IR::FmzMode::DontCare,
65 };
66 static constexpr IR::FpControl f32_control{
67 .no_contraction = false,
68 .rounding = IR::FpRounding::RN,
69 .fmz_mode = IR::FmzMode::FTZ,
70 };
71 switch (op) {
72 case AtomOp::ADD:
73 return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
74 : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
75 case AtomOp::MIN:
76 return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
77 case AtomOp::MAX:
78 return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
79 default:
80 throw NotImplementedException("FP Atom Operation {}", op);
81 }
82}
83
84IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
85 union {
86 u64 raw;
87 BitField<8, 8, IR::Reg> addr_reg;
88 BitField<28, 20, s64> addr_offset;
89 BitField<28, 20, u64> rz_addr_offset;
90 BitField<48, 1, u64> e;
91 } const mem{insn};
92
93 const IR::U64 address{[&]() -> IR::U64 {
94 if (mem.e == 0) {
95 return v.ir.UConvert(64, v.X(mem.addr_reg));
96 }
97 return v.L(mem.addr_reg);
98 }()};
99 const u64 addr_offset{[&]() -> u64 {
100 if (mem.addr_reg == IR::Reg::RZ) {
101 // When RZ is used, the address is an absolute address
102 return static_cast<u64>(mem.rz_addr_offset.Value());
103 } else {
104 return static_cast<u64>(mem.addr_offset.Value());
105 }
106 }()};
107 return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
108}
109
110bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
111 // TODO: SAFEADD
112 switch (size) {
113 case AtomSize::S32:
114 case AtomSize::U64:
115 return (op == AtomOp::INC || op == AtomOp::DEC);
116 case AtomSize::S64:
117 return !(op == AtomOp::MIN || op == AtomOp::MAX);
118 case AtomSize::F32:
119 return op != AtomOp::ADD;
120 case AtomSize::F16x2:
121 return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
122 default:
123 return false;
124 }
125}
126
127IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
128 switch (size) {
129 case AtomSize::U32:
130 case AtomSize::S32:
131 case AtomSize::F32:
132 case AtomSize::F16x2:
133 return ir.LoadGlobal32(offset);
134 case AtomSize::U64:
135 case AtomSize::S64:
136 return ir.PackUint2x32(ir.LoadGlobal64(offset));
137 default:
138 throw NotImplementedException("Atom Size {}", size);
139 }
140}
141
142void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
143 switch (size) {
144 case AtomSize::U32:
145 case AtomSize::S32:
146 case AtomSize::F16x2:
147 return v.X(dest_reg, IR::U32{result});
148 case AtomSize::U64:
149 case AtomSize::S64:
150 return v.L(dest_reg, IR::U64{result});
151 case AtomSize::F32:
152 return v.F(dest_reg, IR::F32{result});
153 default:
154 break;
155 }
156}
157
158IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset,
159 AtomSize size, AtomOp op) {
160 switch (size) {
161 case AtomSize::U32:
162 case AtomSize::S32:
163 return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32);
164 case AtomSize::U64:
165 case AtomSize::S64:
166 return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64);
167 case AtomSize::F32:
168 return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size);
169 case AtomSize::F16x2: {
170 return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size);
171 }
172 default:
173 throw NotImplementedException("Atom Size {}", size);
174 }
175}
176
177void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg,
178 const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) {
179 IR::Value result;
180 if (AtomOpNotApplicable(size, op)) {
181 result = LoadGlobal(v.ir, offset, size);
182 } else {
183 result = ApplyAtomOp(v, operand_reg, offset, size, op);
184 }
185 if (write_dest) {
186 StoreResult(v, dest_reg, result, size);
187 }
188}
189} // Anonymous namespace
190
191void TranslatorVisitor::ATOM(u64 insn) {
192 union {
193 u64 raw;
194 BitField<0, 8, IR::Reg> dest_reg;
195 BitField<20, 8, IR::Reg> operand_reg;
196 BitField<49, 3, AtomSize> size;
197 BitField<52, 4, AtomOp> op;
198 } const atom{insn};
199 const IR::U64 offset{AtomOffset(*this, insn)};
200 GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true);
201}
202
203void TranslatorVisitor::RED(u64 insn) {
204 union {
205 u64 raw;
206 BitField<0, 8, IR::Reg> operand_reg;
207 BitField<20, 3, AtomSize> size;
208 BitField<23, 3, AtomOp> op;
209 } const red{insn};
210 const IR::U64 offset{AtomOffset(*this, insn)};
211 GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true);
212}
213
214} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
new file mode 100644
index 000000000..8b974621e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
@@ -0,0 +1,110 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class AtomOp : u64 {
12 ADD,
13 MIN,
14 MAX,
15 INC,
16 DEC,
17 AND,
18 OR,
19 XOR,
20 EXCH,
21};
22
23enum class AtomsSize : u64 {
24 U32,
25 S32,
26 U64,
27};
28
29IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
30 bool is_signed) {
31 switch (op) {
32 case AtomOp::ADD:
33 return ir.SharedAtomicIAdd(offset, op_b);
34 case AtomOp::MIN:
35 return ir.SharedAtomicIMin(offset, op_b, is_signed);
36 case AtomOp::MAX:
37 return ir.SharedAtomicIMax(offset, op_b, is_signed);
38 case AtomOp::INC:
39 return ir.SharedAtomicInc(offset, op_b);
40 case AtomOp::DEC:
41 return ir.SharedAtomicDec(offset, op_b);
42 case AtomOp::AND:
43 return ir.SharedAtomicAnd(offset, op_b);
44 case AtomOp::OR:
45 return ir.SharedAtomicOr(offset, op_b);
46 case AtomOp::XOR:
47 return ir.SharedAtomicXor(offset, op_b);
48 case AtomOp::EXCH:
49 return ir.SharedAtomicExchange(offset, op_b);
50 default:
51 throw NotImplementedException("Integer Atoms Operation {}", op);
52 }
53}
54
55IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
56 union {
57 u64 raw;
58 BitField<8, 8, IR::Reg> offset_reg;
59 BitField<30, 22, u64> absolute_offset;
60 BitField<30, 22, s64> relative_offset;
61 } const encoding{insn};
62
63 if (encoding.offset_reg == IR::Reg::RZ) {
64 return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
65 } else {
66 const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
67 return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
68 }
69}
70
71void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
72 switch (size) {
73 case AtomsSize::U32:
74 case AtomsSize::S32:
75 return v.X(dest_reg, IR::U32{result});
76 case AtomsSize::U64:
77 return v.L(dest_reg, IR::U64{result});
78 default:
79 break;
80 }
81}
82} // Anonymous namespace
83
84void TranslatorVisitor::ATOMS(u64 insn) {
85 union {
86 u64 raw;
87 BitField<0, 8, IR::Reg> dest_reg;
88 BitField<8, 8, IR::Reg> addr_reg;
89 BitField<20, 8, IR::Reg> src_reg_b;
90 BitField<28, 2, AtomsSize> size;
91 BitField<52, 4, AtomOp> op;
92 } const atoms{insn};
93
94 const bool size_64{atoms.size == AtomsSize::U64};
95 if (size_64 && atoms.op != AtomOp::EXCH) {
96 throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
97 }
98 const bool is_signed{atoms.size == AtomsSize::S32};
99 const IR::U32 offset{AtomsOffset(*this, insn)};
100
101 IR::Value result;
102 if (size_64) {
103 result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
104 } else {
105 result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
106 }
107 StoreResult(*this, atoms.dest_reg, result, atoms.size);
108}
109
110} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
new file mode 100644
index 000000000..fb3f00d3f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
@@ -0,0 +1,35 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/opcodes.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11
12enum class BitSize : u64 {
13 B32,
14 B64,
15 B96,
16 B128,
17};
18
19void TranslatorVisitor::AL2P(u64 inst) {
20 union {
21 u64 raw;
22 BitField<0, 8, IR::Reg> result_register;
23 BitField<8, 8, IR::Reg> indexing_register;
24 BitField<20, 11, s64> offset;
25 BitField<47, 2, BitSize> bitsize;
26 } al2p{inst};
27 if (al2p.bitsize != BitSize::B32) {
28 throw NotImplementedException("BitSize {}", al2p.bitsize.Value());
29 }
30 const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))};
31 const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)};
32 X(al2p.result_register, result);
33}
34
35} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
new file mode 100644
index 000000000..86e433e41
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
@@ -0,0 +1,96 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/ir/modifiers.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13// Seems to be in CUDA terminology.
14enum class LocalScope : u64 {
15 CTA,
16 GL,
17 SYS,
18 VC,
19};
20} // Anonymous namespace
21
22void TranslatorVisitor::MEMBAR(u64 inst) {
23 union {
24 u64 raw;
25 BitField<8, 2, LocalScope> scope;
26 } const membar{inst};
27
28 if (membar.scope == LocalScope::CTA) {
29 ir.WorkgroupMemoryBarrier();
30 } else {
31 ir.DeviceMemoryBarrier();
32 }
33}
34
35void TranslatorVisitor::DEPBAR() {
36 // DEPBAR is a no-op
37}
38
39void TranslatorVisitor::BAR(u64 insn) {
40 enum class Mode {
41 RedPopc,
42 Scan,
43 RedAnd,
44 RedOr,
45 Sync,
46 Arrive,
47 };
48 union {
49 u64 raw;
50 BitField<43, 1, u64> is_a_imm;
51 BitField<44, 1, u64> is_b_imm;
52 BitField<8, 8, u64> imm_a;
53 BitField<20, 12, u64> imm_b;
54 BitField<42, 1, u64> neg_pred;
55 BitField<39, 3, IR::Pred> pred;
56 } const bar{insn};
57
58 const Mode mode{[insn] {
59 switch (insn & 0x0000009B00000000ULL) {
60 case 0x0000000200000000ULL:
61 return Mode::RedPopc;
62 case 0x0000000300000000ULL:
63 return Mode::Scan;
64 case 0x0000000A00000000ULL:
65 return Mode::RedAnd;
66 case 0x0000001200000000ULL:
67 return Mode::RedOr;
68 case 0x0000008000000000ULL:
69 return Mode::Sync;
70 case 0x0000008100000000ULL:
71 return Mode::Arrive;
72 }
73 throw NotImplementedException("Invalid encoding");
74 }()};
75 if (mode != Mode::Sync) {
76 throw NotImplementedException("BAR mode {}", mode);
77 }
78 if (bar.is_a_imm == 0) {
79 throw NotImplementedException("Non-immediate input A");
80 }
81 if (bar.imm_a != 0) {
82 throw NotImplementedException("Non-zero input A");
83 }
84 if (bar.is_b_imm == 0) {
85 throw NotImplementedException("Non-immediate input B");
86 }
87 if (bar.imm_b != 0) {
88 throw NotImplementedException("Non-zero input B");
89 }
90 if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) {
91 throw NotImplementedException("Non-true input predicate");
92 }
93 ir.Barrier();
94}
95
96} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
new file mode 100644
index 000000000..9d5a87e52
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
@@ -0,0 +1,74 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> offset_reg;
16 BitField<40, 1, u64> brev;
17 BitField<47, 1, u64> cc;
18 BitField<48, 1, u64> is_signed;
19 } const bfe{insn};
20
21 const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)};
22 const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)};
23
24 // Common constants
25 const IR::U32 zero{v.ir.Imm32(0)};
26 const IR::U32 one{v.ir.Imm32(1)};
27 const IR::U32 max_size{v.ir.Imm32(32)};
28 // Edge case conditions
29 const IR::U1 zero_count{v.ir.IEqual(count, zero)};
30 const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)};
31 const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)};
32
33 IR::U32 base{v.X(bfe.offset_reg)};
34 if (bfe.brev != 0) {
35 base = v.ir.BitReverse(base);
36 }
37 IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)};
38 if (bfe.is_signed != 0) {
39 const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)};
40 const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
41 const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)};
42 // Replicate condition
43 result = IR::U32{v.ir.Select(replicate, replicated_bit, result)};
44 // Exceeding condition
45 const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)};
46 result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)};
47 }
48 // Zero count condition
49 result = IR::U32{v.ir.Select(zero_count, zero, result)};
50
51 v.X(bfe.dest_reg, result);
52
53 if (bfe.cc != 0) {
54 v.SetZFlag(v.ir.IEqual(result, zero));
55 v.SetSFlag(v.ir.ILessThan(result, zero, true));
56 v.ResetCFlag();
57 v.ResetOFlag();
58 }
59}
60} // Anonymous namespace
61
62void TranslatorVisitor::BFE_reg(u64 insn) {
63 BFE(*this, insn, GetReg20(insn));
64}
65
66void TranslatorVisitor::BFE_cbuf(u64 insn) {
67 BFE(*this, insn, GetCbuf(insn));
68}
69
70void TranslatorVisitor::BFE_imm(u64 insn) {
71 BFE(*this, insn, GetImm20(insn));
72}
73
74} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
new file mode 100644
index 000000000..1e1ec2119
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> insert_reg;
16 BitField<47, 1, u64> cc;
17 } const bfi{insn};
18
19 const IR::U32 zero{v.ir.Imm32(0)};
20 const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)};
21 const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)};
22 const IR::U32 max_size{v.ir.Imm32(32)};
23
24 // Edge case conditions
25 const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)};
26 const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)};
27
28 const IR::U32 remaining_size{v.ir.ISub(max_size, offset)};
29 const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)};
30
31 const IR::U32 insert{v.X(bfi.insert_reg)};
32 IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)};
33
34 result = IR::U32{v.ir.Select(exceed_offset, base, result)};
35
36 v.X(bfi.dest_reg, result);
37 if (bfi.cc != 0) {
38 v.SetZFlag(v.ir.IEqual(result, zero));
39 v.SetSFlag(v.ir.ILessThan(result, zero, true));
40 v.ResetCFlag();
41 v.ResetOFlag();
42 }
43}
44} // Anonymous namespace
45
46void TranslatorVisitor::BFI_reg(u64 insn) {
47 BFI(*this, insn, GetReg20(insn), GetReg39(insn));
48}
49
50void TranslatorVisitor::BFI_rc(u64 insn) {
51 BFI(*this, insn, GetReg39(insn), GetCbuf(insn));
52}
53
54void TranslatorVisitor::BFI_cr(u64 insn) {
55 BFI(*this, insn, GetCbuf(insn), GetReg39(insn));
56}
57
58void TranslatorVisitor::BFI_imm(u64 insn) {
59 BFI(*this, insn, GetImm20(insn), GetReg39(insn));
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
new file mode 100644
index 000000000..371c0e0f7
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
@@ -0,0 +1,36 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void Check(u64 insn) {
13 union {
14 u64 raw;
15 BitField<5, 1, u64> cbuf_mode;
16 BitField<6, 1, u64> lmt;
17 } const encoding{insn};
18
19 if (encoding.cbuf_mode != 0) {
20 throw NotImplementedException("Constant buffer mode");
21 }
22 if (encoding.lmt != 0) {
23 throw NotImplementedException("LMT");
24 }
25}
26} // Anonymous namespace
27
28void TranslatorVisitor::BRX(u64 insn) {
29 Check(insn);
30}
31
32void TranslatorVisitor::JMX(u64 insn) {
33 Check(insn);
34}
35
36} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
new file mode 100644
index 000000000..fd73f656c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
@@ -0,0 +1,57 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
11
12namespace Shader::Maxwell {
13
14enum class FpRounding : u64 {
15 RN,
16 RM,
17 RP,
18 RZ,
19};
20
21enum class FmzMode : u64 {
22 None,
23 FTZ,
24 FMZ,
25 INVALIDFMZ3,
26};
27
28inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) {
29 switch (fp_rounding) {
30 case FpRounding::RN:
31 return IR::FpRounding::RN;
32 case FpRounding::RM:
33 return IR::FpRounding::RM;
34 case FpRounding::RP:
35 return IR::FpRounding::RP;
36 case FpRounding::RZ:
37 return IR::FpRounding::RZ;
38 }
39 throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding);
40}
41
42inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
43 switch (fmz_mode) {
44 case FmzMode::None:
45 return IR::FmzMode::None;
46 case FmzMode::FTZ:
47 return IR::FmzMode::FTZ;
48 case FmzMode::FMZ:
49 // FMZ is manually handled in the instruction
50 return IR::FmzMode::FTZ;
51 case FmzMode::INVALIDFMZ3:
52 break;
53 }
54 throw NotImplementedException("Invalid FMZ mode {}", fmz_mode);
55}
56
57} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
new file mode 100644
index 000000000..20458d2ad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
@@ -0,0 +1,153 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
6
7namespace Shader::Maxwell {
8IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
9 CompareOp compare_op, bool is_signed) {
10 switch (compare_op) {
11 case CompareOp::False:
12 return ir.Imm1(false);
13 case CompareOp::LessThan:
14 return ir.ILessThan(operand_1, operand_2, is_signed);
15 case CompareOp::Equal:
16 return ir.IEqual(operand_1, operand_2);
17 case CompareOp::LessThanEqual:
18 return ir.ILessThanEqual(operand_1, operand_2, is_signed);
19 case CompareOp::GreaterThan:
20 return ir.IGreaterThan(operand_1, operand_2, is_signed);
21 case CompareOp::NotEqual:
22 return ir.INotEqual(operand_1, operand_2);
23 case CompareOp::GreaterThanEqual:
24 return ir.IGreaterThanEqual(operand_1, operand_2, is_signed);
25 case CompareOp::True:
26 return ir.Imm1(true);
27 default:
28 throw NotImplementedException("Invalid compare op {}", compare_op);
29 }
30}
31
32IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
33 CompareOp compare_op, bool is_signed) {
34 const IR::U32 zero{ir.Imm32(0)};
35 const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)};
36 const IR::U1 z_flag{ir.GetZFlag()};
37 const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)};
38 const IR::U1 flip_logic{is_signed ? ir.Imm1(false)
39 : ir.LogicalXor(ir.ILessThan(operand_1, zero, true),
40 ir.ILessThan(operand_2, zero, true))};
41 switch (compare_op) {
42 case CompareOp::False:
43 return ir.Imm1(false);
44 case CompareOp::LessThan:
45 return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
46 ir.ILessThan(intermediate, zero, true))};
47 case CompareOp::Equal:
48 return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag);
49 case CompareOp::LessThanEqual: {
50 const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
51 ir.ILessThan(intermediate, zero, true))};
52 return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
53 }
54 case CompareOp::GreaterThan: {
55 const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true),
56 ir.IGreaterThan(intermediate, zero, true))};
57 const IR::U1 not_z{ir.LogicalNot(z_flag)};
58 return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z));
59 }
60 case CompareOp::NotEqual:
61 return ir.LogicalOr(ir.INotEqual(intermediate, zero),
62 ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag)));
63 case CompareOp::GreaterThanEqual: {
64 const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true),
65 ir.IGreaterThanEqual(intermediate, zero, true))};
66 return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
67 }
68 case CompareOp::True:
69 return ir.Imm1(true);
70 default:
71 throw NotImplementedException("Invalid compare op {}", compare_op);
72 }
73}
74
75IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2,
76 BooleanOp bop) {
77 switch (bop) {
78 case BooleanOp::AND:
79 return ir.LogicalAnd(predicate_1, predicate_2);
80 case BooleanOp::OR:
81 return ir.LogicalOr(predicate_1, predicate_2);
82 case BooleanOp::XOR:
83 return ir.LogicalXor(predicate_1, predicate_2);
84 default:
85 throw NotImplementedException("Invalid bop {}", bop);
86 }
87}
88
89IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) {
90 switch (op) {
91 case PredicateOp::False:
92 return ir.Imm1(false);
93 case PredicateOp::True:
94 return ir.Imm1(true);
95 case PredicateOp::Zero:
96 return ir.IEqual(result, ir.Imm32(0));
97 case PredicateOp::NonZero:
98 return ir.INotEqual(result, ir.Imm32(0));
99 default:
100 throw NotImplementedException("Invalid Predicate operation {}", op);
101 }
102}
103
104bool IsCompareOpOrdered(FPCompareOp op) {
105 switch (op) {
106 case FPCompareOp::LTU:
107 case FPCompareOp::EQU:
108 case FPCompareOp::LEU:
109 case FPCompareOp::GTU:
110 case FPCompareOp::NEU:
111 case FPCompareOp::GEU:
112 return false;
113 default:
114 return true;
115 }
116}
117
118IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
119 const IR::F16F32F64& operand_2, FPCompareOp compare_op,
120 IR::FpControl control) {
121 const bool ordered{IsCompareOpOrdered(compare_op)};
122 switch (compare_op) {
123 case FPCompareOp::F:
124 return ir.Imm1(false);
125 case FPCompareOp::LT:
126 case FPCompareOp::LTU:
127 return ir.FPLessThan(operand_1, operand_2, control, ordered);
128 case FPCompareOp::EQ:
129 case FPCompareOp::EQU:
130 return ir.FPEqual(operand_1, operand_2, control, ordered);
131 case FPCompareOp::LE:
132 case FPCompareOp::LEU:
133 return ir.FPLessThanEqual(operand_1, operand_2, control, ordered);
134 case FPCompareOp::GT:
135 case FPCompareOp::GTU:
136 return ir.FPGreaterThan(operand_1, operand_2, control, ordered);
137 case FPCompareOp::NE:
138 case FPCompareOp::NEU:
139 return ir.FPNotEqual(operand_1, operand_2, control, ordered);
140 case FPCompareOp::GE:
141 case FPCompareOp::GEU:
142 return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered);
143 case FPCompareOp::NUM:
144 return ir.FPOrdered(operand_1, operand_2);
145 case FPCompareOp::Nan:
146 return ir.FPUnordered(operand_1, operand_2);
147 case FPCompareOp::T:
148 return ir.Imm1(true);
149 default:
150 throw NotImplementedException("Invalid FP compare op {}", compare_op);
151 }
152}
153} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
new file mode 100644
index 000000000..214d0af3c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
12 const IR::U32& operand_2, CompareOp compare_op, bool is_signed);
13
14[[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
15 const IR::U32& operand_2, CompareOp compare_op,
16 bool is_signed);
17
18[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1,
19 const IR::U1& predicate_2, BooleanOp bop);
20
21[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op);
22
23[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op);
24
25[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
26 const IR::F16F32F64& operand_2, FPCompareOp compare_op,
27 IR::FpControl control = {});
28} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
new file mode 100644
index 000000000..420f2fb94
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11
12void TranslatorVisitor::CSET(u64 insn) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 5, IR::FlowTest> cc_test;
17 BitField<39, 3, IR::Pred> bop_pred;
18 BitField<42, 1, u64> neg_bop_pred;
19 BitField<44, 1, u64> bf;
20 BitField<45, 2, BooleanOp> bop;
21 BitField<47, 1, u64> cc;
22 } const cset{insn};
23
24 const IR::U32 one_mask{ir.Imm32(-1)};
25 const IR::U32 fp_one{ir.Imm32(0x3f800000)};
26 const IR::U32 zero{ir.Imm32(0)};
27 const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one};
28 const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)};
29 const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)};
30 const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)};
31 const IR::U32 result{ir.Select(pred_result, pass_result, zero)};
32 X(cset.dest_reg, result);
33 if (cset.cc != 0) {
34 const IR::U1 is_zero{ir.IEqual(result, zero)};
35 SetZFlag(is_zero);
36 if (cset.bf != 0) {
37 ResetSFlag();
38 } else {
39 SetSFlag(ir.LogicalNot(is_zero));
40 }
41 ResetOFlag();
42 ResetCFlag();
43 }
44}
45
46void TranslatorVisitor::CSETP(u64 insn) {
47 union {
48 u64 raw;
49 BitField<0, 3, IR::Pred> dest_pred_b;
50 BitField<3, 3, IR::Pred> dest_pred_a;
51 BitField<8, 5, IR::FlowTest> cc_test;
52 BitField<39, 3, IR::Pred> bop_pred;
53 BitField<42, 1, u64> neg_bop_pred;
54 BitField<45, 2, BooleanOp> bop;
55 } const csetp{insn};
56
57 const BooleanOp bop{csetp.bop};
58 const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)};
59 const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)};
60 const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)};
61 const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)};
62 ir.SetPred(csetp.dest_pred_a, result_a);
63 ir.SetPred(csetp.dest_pred_b, result_b);
64}
65
66} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
new file mode 100644
index 000000000..5a1b3a8fc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12
13void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a_reg;
18 BitField<39, 2, FpRounding> fp_rounding;
19 BitField<45, 1, u64> neg_b;
20 BitField<46, 1, u64> abs_a;
21 BitField<47, 1, u64> cc;
22 BitField<48, 1, u64> neg_a;
23 BitField<49, 1, u64> abs_b;
24 } const dadd{insn};
25 if (dadd.cc != 0) {
26 throw NotImplementedException("DADD CC");
27 }
28
29 const IR::F64 src_a{v.D(dadd.src_a_reg)};
30 const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)};
31 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
32
33 const IR::FpControl control{
34 .no_contraction = true,
35 .rounding = CastFpRounding(dadd.fp_rounding),
36 .fmz_mode = IR::FmzMode::None,
37 };
38
39 v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
40}
41} // Anonymous namespace
42
43void TranslatorVisitor::DADD_reg(u64 insn) {
44 DADD(*this, insn, GetDoubleReg20(insn));
45}
46
47void TranslatorVisitor::DADD_cbuf(u64 insn) {
48 DADD(*this, insn, GetDoubleCbuf(insn));
49}
50
51void TranslatorVisitor::DADD_imm(u64 insn) {
52 DADD(*this, insn, GetDoubleImm20(insn));
53}
54
55} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
new file mode 100644
index 000000000..1173192e4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
@@ -0,0 +1,72 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a_reg;
17 BitField<39, 3, IR::Pred> pred;
18 BitField<42, 1, u64> neg_pred;
19 BitField<43, 1, u64> negate_a;
20 BitField<44, 1, u64> abs_b;
21 BitField<45, 2, BooleanOp> bop;
22 BitField<47, 1, u64> cc;
23 BitField<48, 4, FPCompareOp> compare_op;
24 BitField<52, 1, u64> bf;
25 BitField<53, 1, u64> negate_b;
26 BitField<54, 1, u64> abs_a;
27 } const dset{insn};
28
29 const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)};
30 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)};
31
32 IR::U1 pred{v.ir.GetPred(dset.pred)};
33 if (dset.neg_pred != 0) {
34 pred = v.ir.LogicalNot(pred);
35 }
36 const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)};
37 const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)};
38
39 const IR::U32 one_mask{v.ir.Imm32(-1)};
40 const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
41 const IR::U32 zero{v.ir.Imm32(0)};
42 const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one};
43 const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
44
45 v.X(dset.dest_reg, result);
46 if (dset.cc != 0) {
47 const IR::U1 is_zero{v.ir.IEqual(result, zero)};
48 v.SetZFlag(is_zero);
49 if (dset.bf != 0) {
50 v.ResetSFlag();
51 } else {
52 v.SetSFlag(v.ir.LogicalNot(is_zero));
53 }
54 v.ResetCFlag();
55 v.ResetOFlag();
56 }
57}
58} // Anonymous namespace
59
60void TranslatorVisitor::DSET_reg(u64 insn) {
61 DSET(*this, insn, GetDoubleReg20(insn));
62}
63
64void TranslatorVisitor::DSET_cbuf(u64 insn) {
65 DSET(*this, insn, GetDoubleCbuf(insn));
66}
67
68void TranslatorVisitor::DSET_imm(u64 insn) {
69 DSET(*this, insn, GetDoubleImm20(insn));
70}
71
72} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
new file mode 100644
index 000000000..f66097014
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
@@ -0,0 +1,58 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12
13void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a_reg;
18 BitField<50, 2, FpRounding> fp_rounding;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> neg_b;
21 BitField<49, 1, u64> neg_c;
22 } const dfma{insn};
23
24 if (dfma.cc != 0) {
25 throw NotImplementedException("DFMA CC");
26 }
27
28 const IR::F64 src_a{v.D(dfma.src_a_reg)};
29 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)};
30 const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)};
31
32 const IR::FpControl control{
33 .no_contraction = true,
34 .rounding = CastFpRounding(dfma.fp_rounding),
35 .fmz_mode = IR::FmzMode::None,
36 };
37
38 v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control));
39}
40} // Anonymous namespace
41
42void TranslatorVisitor::DFMA_reg(u64 insn) {
43 DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn));
44}
45
46void TranslatorVisitor::DFMA_cr(u64 insn) {
47 DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn));
48}
49
50void TranslatorVisitor::DFMA_rc(u64 insn) {
51 DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn));
52}
53
54void TranslatorVisitor::DFMA_imm(u64 insn) {
55 DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn));
56}
57
58} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
new file mode 100644
index 000000000..6b551847c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a_reg;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<45, 1, u64> negate_b;
19 BitField<46, 1, u64> abs_a;
20 BitField<47, 1, u64> cc;
21 BitField<48, 1, u64> negate_a;
22 BitField<49, 1, u64> abs_b;
23 } const dmnmx{insn};
24
25 if (dmnmx.cc != 0) {
26 throw NotImplementedException("DMNMX CC");
27 }
28
29 const IR::U1 pred{v.ir.GetPred(dmnmx.pred)};
30 const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)};
31 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)};
32
33 IR::F64 max{v.ir.FPMax(op_a, op_b)};
34 IR::F64 min{v.ir.FPMin(op_a, op_b)};
35
36 if (dmnmx.neg_pred != 0) {
37 std::swap(min, max);
38 }
39 v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)});
40}
41} // Anonymous namespace
42
43void TranslatorVisitor::DMNMX_reg(u64 insn) {
44 DMNMX(*this, insn, GetDoubleReg20(insn));
45}
46
47void TranslatorVisitor::DMNMX_cbuf(u64 insn) {
48 DMNMX(*this, insn, GetDoubleCbuf(insn));
49}
50
51void TranslatorVisitor::DMNMX_imm(u64 insn) {
52 DMNMX(*this, insn, GetDoubleImm20(insn));
53}
54
55} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
new file mode 100644
index 000000000..c0159fb65
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
@@ -0,0 +1,50 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12
13void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a_reg;
18 BitField<39, 2, FpRounding> fp_rounding;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> neg;
21 } const dmul{insn};
22
23 if (dmul.cc != 0) {
24 throw NotImplementedException("DMUL CC");
25 }
26
27 const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)};
28 const IR::FpControl control{
29 .no_contraction = true,
30 .rounding = CastFpRounding(dmul.fp_rounding),
31 .fmz_mode = IR::FmzMode::None,
32 };
33
34 v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control));
35}
36} // Anonymous namespace
37
38void TranslatorVisitor::DMUL_reg(u64 insn) {
39 DMUL(*this, insn, GetDoubleReg20(insn));
40}
41
42void TranslatorVisitor::DMUL_cbuf(u64 insn) {
43 DMUL(*this, insn, GetDoubleCbuf(insn));
44}
45
46void TranslatorVisitor::DMUL_imm(u64 insn) {
47 DMUL(*this, insn, GetDoubleImm20(insn));
48}
49
50} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
new file mode 100644
index 000000000..b8e74ee44
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
@@ -0,0 +1,54 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 3, IR::Pred> dest_pred_b;
16 BitField<3, 3, IR::Pred> dest_pred_a;
17 BitField<6, 1, u64> negate_b;
18 BitField<7, 1, u64> abs_a;
19 BitField<8, 8, IR::Reg> src_a_reg;
20 BitField<39, 3, IR::Pred> bop_pred;
21 BitField<42, 1, u64> neg_bop_pred;
22 BitField<43, 1, u64> negate_a;
23 BitField<44, 1, u64> abs_b;
24 BitField<45, 2, BooleanOp> bop;
25 BitField<48, 4, FPCompareOp> compare_op;
26 } const dsetp{insn};
27
28 const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)};
29 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)};
30
31 const BooleanOp bop{dsetp.bop};
32 const FPCompareOp compare_op{dsetp.compare_op};
33 const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)};
34 const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)};
35 const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
36 const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
37 v.ir.SetPred(dsetp.dest_pred_a, result_a);
38 v.ir.SetPred(dsetp.dest_pred_b, result_b);
39}
40} // Anonymous namespace
41
42void TranslatorVisitor::DSETP_reg(u64 insn) {
43 DSETP(*this, insn, GetDoubleReg20(insn));
44}
45
46void TranslatorVisitor::DSETP_cbuf(u64 insn) {
47 DSETP(*this, insn, GetDoubleCbuf(insn));
48}
49
50void TranslatorVisitor::DSETP_imm(u64 insn) {
51 DSETP(*this, insn, GetDoubleImm20(insn));
52}
53
54} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
new file mode 100644
index 000000000..c2443c886
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
@@ -0,0 +1,43 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void ExitFragment(TranslatorVisitor& v) {
12 const ProgramHeader sph{v.env.SPH()};
13 IR::Reg src_reg{IR::Reg::R0};
14 for (u32 render_target = 0; render_target < 8; ++render_target) {
15 const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)};
16 for (u32 component = 0; component < 4; ++component) {
17 if (!mask[component]) {
18 continue;
19 }
20 v.ir.SetFragColor(render_target, component, v.F(src_reg));
21 ++src_reg;
22 }
23 }
24 if (sph.ps.omap.sample_mask != 0) {
25 v.ir.SetSampleMask(v.X(src_reg));
26 }
27 if (sph.ps.omap.depth != 0) {
28 v.ir.SetFragDepth(v.F(src_reg + 1));
29 }
30}
31} // Anonymous namespace
32
33void TranslatorVisitor::EXIT() {
34 switch (env.ShaderStage()) {
35 case Stage::Fragment:
36 ExitFragment(*this);
37 break;
38 default:
39 break;
40 }
41}
42
43} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
new file mode 100644
index 000000000..f0cb25d61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
@@ -0,0 +1,47 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<40, 1, u64> tilde;
16 BitField<41, 1, u64> shift;
17 BitField<47, 1, u64> cc;
18 BitField<48, 1, u64> is_signed;
19 } const flo{insn};
20
21 if (flo.cc != 0) {
22 throw NotImplementedException("CC");
23 }
24 if (flo.tilde != 0) {
25 src = v.ir.BitwiseNot(src);
26 }
27 IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)};
28 if (flo.shift != 0) {
29 const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))};
30 result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))};
31 }
32 v.X(flo.dest_reg, result);
33}
34} // Anonymous namespace
35
36void TranslatorVisitor::FLO_reg(u64 insn) {
37 FLO(*this, insn, GetReg20(insn));
38}
39
40void TranslatorVisitor::FLO_cbuf(u64 insn) {
41 FLO(*this, insn, GetCbuf(insn));
42}
43
44void TranslatorVisitor::FLO_imm(u64 insn) {
45 FLO(*this, insn, GetImm20(insn));
46}
47} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
new file mode 100644
index 000000000..b8c89810c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -0,0 +1,82 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
13 const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a;
18 } const fadd{insn};
19
20 if (cc) {
21 throw NotImplementedException("FADD CC");
22 }
23 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
24 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
25 IR::FpControl control{
26 .no_contraction = true,
27 .rounding = CastFpRounding(fp_rounding),
28 .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
29 };
30 IR::F32 value{v.ir.FPAdd(op_a, op_b, control)};
31 if (sat) {
32 value = v.ir.FPSaturate(value);
33 }
34 v.F(fadd.dest_reg, value);
35}
36
37void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
38 union {
39 u64 raw;
40 BitField<39, 2, FpRounding> fp_rounding;
41 BitField<44, 1, u64> ftz;
42 BitField<45, 1, u64> neg_b;
43 BitField<46, 1, u64> abs_a;
44 BitField<47, 1, u64> cc;
45 BitField<48, 1, u64> neg_a;
46 BitField<49, 1, u64> abs_b;
47 BitField<50, 1, u64> sat;
48 } const fadd{insn};
49
50 FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b,
51 fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0);
52}
53} // Anonymous namespace
54
55void TranslatorVisitor::FADD_reg(u64 insn) {
56 FADD(*this, insn, GetFloatReg20(insn));
57}
58
59void TranslatorVisitor::FADD_cbuf(u64 insn) {
60 FADD(*this, insn, GetFloatCbuf(insn));
61}
62
63void TranslatorVisitor::FADD_imm(u64 insn) {
64 FADD(*this, insn, GetFloatImm20(insn));
65}
66
67void TranslatorVisitor::FADD32I(u64 insn) {
68 union {
69 u64 raw;
70 BitField<55, 1, u64> ftz;
71 BitField<56, 1, u64> neg_a;
72 BitField<54, 1, u64> abs_a;
73 BitField<52, 1, u64> cc;
74 BitField<53, 1, u64> neg_b;
75 BitField<57, 1, u64> abs_b;
76 } const fadd32i{insn};
77
78 FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn),
79 fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0);
80}
81
82} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
new file mode 100644
index 000000000..7127ebf54
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_reg;
17 BitField<47, 1, u64> ftz;
18 BitField<48, 4, FPCompareOp> compare_op;
19 } const fcmp{insn};
20
21 const IR::F32 zero{v.ir.Imm32(0.0f)};
22 const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)};
23 const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)};
24 const IR::U32 src_reg{v.X(fcmp.src_reg)};
25 const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
26
27 v.X(fcmp.dest_reg, result);
28}
29} // Anonymous namespace
30
31void TranslatorVisitor::FCMP_reg(u64 insn) {
32 FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn));
33}
34
35void TranslatorVisitor::FCMP_rc(u64 insn) {
36 FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn));
37}
38
39void TranslatorVisitor::FCMP_cr(u64 insn) {
40 FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn));
41}
42
43void TranslatorVisitor::FCMP_imm(u64 insn) {
44 union {
45 u64 raw;
46 BitField<20, 19, u64> value;
47 BitField<56, 1, u64> is_negative;
48 } const fcmp{insn};
49 const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0};
50 const u32 value{static_cast<u32>(fcmp.value) << 12};
51
52 FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn));
53}
54
55} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
new file mode 100644
index 000000000..eece4f28f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
@@ -0,0 +1,78 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a_reg;
17 BitField<39, 3, IR::Pred> pred;
18 BitField<42, 1, u64> neg_pred;
19 BitField<43, 1, u64> negate_a;
20 BitField<44, 1, u64> abs_b;
21 BitField<45, 2, BooleanOp> bop;
22 BitField<47, 1, u64> cc;
23 BitField<48, 4, FPCompareOp> compare_op;
24 BitField<52, 1, u64> bf;
25 BitField<53, 1, u64> negate_b;
26 BitField<54, 1, u64> abs_a;
27 BitField<55, 1, u64> ftz;
28 } const fset{insn};
29
30 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)};
31 const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0);
32 const IR::FpControl control{
33 .no_contraction = false,
34 .rounding = IR::FpRounding::DontCare,
35 .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
36 };
37
38 IR::U1 pred{v.ir.GetPred(fset.pred)};
39 if (fset.neg_pred != 0) {
40 pred = v.ir.LogicalNot(pred);
41 }
42 const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)};
43 const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)};
44
45 const IR::U32 one_mask{v.ir.Imm32(-1)};
46 const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
47 const IR::U32 zero{v.ir.Imm32(0)};
48 const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one};
49 const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
50
51 v.X(fset.dest_reg, result);
52 if (fset.cc != 0) {
53 const IR::U1 is_zero{v.ir.IEqual(result, zero)};
54 v.SetZFlag(is_zero);
55 if (fset.bf != 0) {
56 v.ResetSFlag();
57 } else {
58 v.SetSFlag(v.ir.LogicalNot(is_zero));
59 }
60 v.ResetCFlag();
61 v.ResetOFlag();
62 }
63}
64} // Anonymous namespace
65
66void TranslatorVisitor::FSET_reg(u64 insn) {
67 FSET(*this, insn, GetFloatReg20(insn));
68}
69
70void TranslatorVisitor::FSET_cbuf(u64 insn) {
71 FSET(*this, insn, GetFloatCbuf(insn));
72}
73
74void TranslatorVisitor::FSET_imm(u64 insn) {
75 FSET(*this, insn, GetFloatImm20(insn));
76}
77
78} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
new file mode 100644
index 000000000..02ab023c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
@@ -0,0 +1,214 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
6#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
7
8namespace Shader::Maxwell {
9namespace {
10enum class FloatFormat : u64 {
11 F16 = 1,
12 F32 = 2,
13 F64 = 3,
14};
15
16enum class RoundingOp : u64 {
17 None = 0,
18 Pass = 3,
19 Round = 8,
20 Floor = 9,
21 Ceil = 10,
22 Trunc = 11,
23};
24
25[[nodiscard]] u32 WidthSize(FloatFormat width) {
26 switch (width) {
27 case FloatFormat::F16:
28 return 16;
29 case FloatFormat::F32:
30 return 32;
31 case FloatFormat::F64:
32 return 64;
33 default:
34 throw NotImplementedException("Invalid width {}", width);
35 }
36}
37
38void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
39 union {
40 u64 insn;
41 BitField<0, 8, IR::Reg> dest_reg;
42 BitField<44, 1, u64> ftz;
43 BitField<45, 1, u64> neg;
44 BitField<47, 1, u64> cc;
45 BitField<50, 1, u64> sat;
46 BitField<39, 4, u64> rounding_op;
47 BitField<39, 2, FpRounding> rounding;
48 BitField<10, 2, FloatFormat> src_size;
49 BitField<8, 2, FloatFormat> dst_size;
50
51 [[nodiscard]] RoundingOp RoundingOperation() const {
52 constexpr u64 rounding_mask = 0x0B;
53 return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask);
54 }
55 } const f2f{insn};
56
57 if (f2f.cc != 0) {
58 throw NotImplementedException("F2F CC");
59 }
60
61 IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)};
62
63 const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
64 IR::FpControl fp_control{
65 .no_contraction = false,
66 .rounding = IR::FpRounding::DontCare,
67 .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None),
68 };
69 if (f2f.src_size != f2f.dst_size) {
70 fp_control.rounding = CastFpRounding(f2f.rounding);
71 input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control);
72 } else {
73 switch (f2f.RoundingOperation()) {
74 case RoundingOp::None:
75 case RoundingOp::Pass:
76 // Make sure NANs are handled properly
77 switch (f2f.src_size) {
78 case FloatFormat::F16:
79 input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control);
80 break;
81 case FloatFormat::F32:
82 input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control);
83 break;
84 case FloatFormat::F64:
85 input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control);
86 break;
87 }
88 break;
89 case RoundingOp::Round:
90 input = v.ir.FPRoundEven(input, fp_control);
91 break;
92 case RoundingOp::Floor:
93 input = v.ir.FPFloor(input, fp_control);
94 break;
95 case RoundingOp::Ceil:
96 input = v.ir.FPCeil(input, fp_control);
97 break;
98 case RoundingOp::Trunc:
99 input = v.ir.FPTrunc(input, fp_control);
100 break;
101 default:
102 throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value());
103 }
104 }
105 if (f2f.sat != 0 && !any_fp64) {
106 input = v.ir.FPSaturate(input);
107 }
108
109 switch (f2f.dst_size) {
110 case FloatFormat::F16: {
111 const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
112 v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm)));
113 break;
114 }
115 case FloatFormat::F32:
116 v.F(f2f.dest_reg, input);
117 break;
118 case FloatFormat::F64:
119 v.D(f2f.dest_reg, input);
120 break;
121 default:
122 throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value());
123 }
124}
125} // Anonymous namespace
126
127void TranslatorVisitor::F2F_reg(u64 insn) {
128 union {
129 u64 insn;
130 BitField<49, 1, u64> abs;
131 BitField<10, 2, FloatFormat> src_size;
132 BitField<41, 1, u64> selector;
133 } const f2f{insn};
134
135 IR::F16F32F64 src_a;
136 switch (f2f.src_size) {
137 case FloatFormat::F16: {
138 auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)};
139 src_a = f2f.selector != 0 ? rhs_a : lhs_a;
140 break;
141 }
142 case FloatFormat::F32:
143 src_a = GetFloatReg20(insn);
144 break;
145 case FloatFormat::F64:
146 src_a = GetDoubleReg20(insn);
147 break;
148 default:
149 throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
150 }
151 F2F(*this, insn, src_a, f2f.abs != 0);
152}
153
154void TranslatorVisitor::F2F_cbuf(u64 insn) {
155 union {
156 u64 insn;
157 BitField<49, 1, u64> abs;
158 BitField<10, 2, FloatFormat> src_size;
159 BitField<41, 1, u64> selector;
160 } const f2f{insn};
161
162 IR::F16F32F64 src_a;
163 switch (f2f.src_size) {
164 case FloatFormat::F16: {
165 auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)};
166 src_a = f2f.selector != 0 ? rhs_a : lhs_a;
167 break;
168 }
169 case FloatFormat::F32:
170 src_a = GetFloatCbuf(insn);
171 break;
172 case FloatFormat::F64:
173 src_a = GetDoubleCbuf(insn);
174 break;
175 default:
176 throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
177 }
178 F2F(*this, insn, src_a, f2f.abs != 0);
179}
180
181void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
182 union {
183 u64 insn;
184 BitField<49, 1, u64> abs;
185 BitField<10, 2, FloatFormat> src_size;
186 BitField<41, 1, u64> selector;
187 BitField<20, 19, u64> imm;
188 BitField<56, 1, u64> imm_neg;
189 } const f2f{insn};
190
191 IR::F16F32F64 src_a;
192 switch (f2f.src_size) {
193 case FloatFormat::F16: {
194 const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)};
195 const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))};
196 src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)};
197 if (f2f.imm_neg != 0) {
198 throw NotImplementedException("Neg bit on F16");
199 }
200 break;
201 }
202 case FloatFormat::F32:
203 src_a = GetFloatImm20(insn);
204 break;
205 case FloatFormat::F64:
206 src_a = GetDoubleImm20(insn);
207 break;
208 default:
209 throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
210 }
211 F2F(*this, insn, src_a, f2f.abs != 0);
212}
213
214} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
new file mode 100644
index 000000000..92b1ce015
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
@@ -0,0 +1,253 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <limits>
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/maxwell/opcodes.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class DestFormat : u64 {
15 Invalid,
16 I16,
17 I32,
18 I64,
19};
20enum class SrcFormat : u64 {
21 Invalid,
22 F16,
23 F32,
24 F64,
25};
26enum class Rounding : u64 {
27 Round,
28 Floor,
29 Ceil,
30 Trunc,
31};
32
33union F2I {
34 u64 raw;
35 BitField<0, 8, IR::Reg> dest_reg;
36 BitField<8, 2, DestFormat> dest_format;
37 BitField<10, 2, SrcFormat> src_format;
38 BitField<12, 1, u64> is_signed;
39 BitField<39, 2, Rounding> rounding;
40 BitField<41, 1, u64> half;
41 BitField<44, 1, u64> ftz;
42 BitField<45, 1, u64> abs;
43 BitField<47, 1, u64> cc;
44 BitField<49, 1, u64> neg;
45};
46
47size_t BitSize(DestFormat dest_format) {
48 switch (dest_format) {
49 case DestFormat::I16:
50 return 16;
51 case DestFormat::I32:
52 return 32;
53 case DestFormat::I64:
54 return 64;
55 default:
56 throw NotImplementedException("Invalid destination format {}", dest_format);
57 }
58}
59
60std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) {
61 if (is_signed) {
62 switch (format) {
63 case DestFormat::I16:
64 return {static_cast<f64>(std::numeric_limits<s16>::max()),
65 static_cast<f64>(std::numeric_limits<s16>::min())};
66 case DestFormat::I32:
67 return {static_cast<f64>(std::numeric_limits<s32>::max()),
68 static_cast<f64>(std::numeric_limits<s32>::min())};
69 case DestFormat::I64:
70 return {static_cast<f64>(std::numeric_limits<s64>::max()),
71 static_cast<f64>(std::numeric_limits<s64>::min())};
72 default:
73 break;
74 }
75 } else {
76 switch (format) {
77 case DestFormat::I16:
78 return {static_cast<f64>(std::numeric_limits<u16>::max()),
79 static_cast<f64>(std::numeric_limits<u16>::min())};
80 case DestFormat::I32:
81 return {static_cast<f64>(std::numeric_limits<u32>::max()),
82 static_cast<f64>(std::numeric_limits<u32>::min())};
83 case DestFormat::I64:
84 return {static_cast<f64>(std::numeric_limits<u64>::max()),
85 static_cast<f64>(std::numeric_limits<u64>::min())};
86 default:
87 break;
88 }
89 }
90 throw NotImplementedException("Invalid destination format {}", format);
91}
92
93IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
94 union {
95 u64 raw;
96 BitField<20, 14, s64> offset;
97 BitField<34, 5, u64> binding;
98 } const cbuf{insn};
99 if (cbuf.binding >= 18) {
100 throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
101 }
102 if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) {
103 throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4);
104 }
105 if (cbuf.offset % 2 != 0) {
106 throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4);
107 }
108 const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))};
109 const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)};
110 const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)};
111 const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)};
112 return v.ir.PackDouble2x32(vector);
113}
114
115void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
116 // F2I is used to convert from a floating point value to an integer
117 const F2I f2i{insn};
118
119 const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 &&
120 f2i.dest_format != DestFormat::I64};
121 IR::FmzMode fmz_mode{IR::FmzMode::DontCare};
122 if (denorm_cares) {
123 fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None;
124 }
125 const IR::FpControl fp_control{
126 .no_contraction = true,
127 .rounding = IR::FpRounding::DontCare,
128 .fmz_mode = fmz_mode,
129 };
130 const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
131 const IR::F16F32F64 rounded_value{[&] {
132 switch (f2i.rounding) {
133 case Rounding::Round:
134 return v.ir.FPRoundEven(op_a, fp_control);
135 case Rounding::Floor:
136 return v.ir.FPFloor(op_a, fp_control);
137 case Rounding::Ceil:
138 return v.ir.FPCeil(op_a, fp_control);
139 case Rounding::Trunc:
140 return v.ir.FPTrunc(op_a, fp_control);
141 default:
142 throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value());
143 }
144 }()};
145 const bool is_signed{f2i.is_signed != 0};
146 const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed);
147
148 IR::F16F32F64 intermediate;
149 switch (f2i.src_format) {
150 case SrcFormat::F16: {
151 const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))};
152 const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))};
153 intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
154 break;
155 }
156 case SrcFormat::F32: {
157 const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))};
158 const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))};
159 intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
160 break;
161 }
162 case SrcFormat::F64: {
163 const IR::F64 max_val{v.ir.Imm64(max_bound)};
164 const IR::F64 min_val{v.ir.Imm64(min_bound)};
165 intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
166 break;
167 }
168 default:
169 throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value());
170 }
171
172 const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))};
173 IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)};
174
175 bool handled_special_case = false;
176 const bool special_nan_cases =
177 (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64);
178 if (special_nan_cases) {
179 if (f2i.dest_format == DestFormat::I32) {
180 handled_special_case = true;
181 result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)};
182 } else if (f2i.dest_format == DestFormat::I64) {
183 handled_special_case = true;
184 result = IR::U64{
185 v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)};
186 }
187 }
188 if (!handled_special_case && is_signed) {
189 if (bitsize != 64) {
190 result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
191 } else {
192 result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)};
193 }
194 }
195
196 if (bitsize == 64) {
197 v.L(f2i.dest_reg, result);
198 } else {
199 v.X(f2i.dest_reg, result);
200 }
201
202 if (f2i.cc != 0) {
203 throw NotImplementedException("F2I CC");
204 }
205}
206} // Anonymous namespace
207
208void TranslatorVisitor::F2I_reg(u64 insn) {
209 union {
210 u64 raw;
211 F2I base;
212 BitField<20, 8, IR::Reg> src_reg;
213 } const f2i{insn};
214
215 const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
216 switch (f2i.base.src_format) {
217 case SrcFormat::F16:
218 return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)};
219 case SrcFormat::F32:
220 return F(f2i.src_reg);
221 case SrcFormat::F64:
222 return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1)));
223 default:
224 throw NotImplementedException("Invalid F2I source format {}",
225 f2i.base.src_format.Value());
226 }
227 }()};
228 TranslateF2I(*this, insn, op_a);
229}
230
231void TranslatorVisitor::F2I_cbuf(u64 insn) {
232 const F2I f2i{insn};
233 const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
234 switch (f2i.src_format) {
235 case SrcFormat::F16:
236 return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)};
237 case SrcFormat::F32:
238 return GetFloatCbuf(insn);
239 case SrcFormat::F64: {
240 return UnpackCbuf(*this, insn);
241 }
242 default:
243 throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value());
244 }
245 }()};
246 TranslateF2I(*this, insn, op_a);
247}
248
249void TranslatorVisitor::F2I_imm(u64) {
250 throw NotImplementedException("{}", Opcode::F2I_imm);
251}
252
253} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fa2a7807b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -0,0 +1,94 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a,
13 bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a;
18 } const ffma{insn};
19
20 if (cc) {
21 throw NotImplementedException("FFMA CC");
22 }
23 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)};
24 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
25 const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
26 const IR::FpControl fp_control{
27 .no_contraction = true,
28 .rounding = CastFpRounding(fp_rounding),
29 .fmz_mode = CastFmzMode(fmz_mode),
30 };
31 IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)};
32 if (fmz_mode == FmzMode::FMZ && !sat) {
33 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
34 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
35 const IR::F32 zero{v.ir.Imm32(0.0f)};
36 const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
37 const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
38 const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
39 value = IR::F32{v.ir.Select(any_zero, op_c, value)};
40 }
41 if (sat) {
42 value = v.ir.FPSaturate(value);
43 }
44 v.F(ffma.dest_reg, value);
45}
46
47void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) {
48 union {
49 u64 raw;
50 BitField<47, 1, u64> cc;
51 BitField<48, 1, u64> neg_b;
52 BitField<49, 1, u64> neg_c;
53 BitField<50, 1, u64> sat;
54 BitField<51, 2, FpRounding> fp_rounding;
55 BitField<53, 2, FmzMode> fmz_mode;
56 } const ffma{insn};
57
58 FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0,
59 ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding);
60}
61} // Anonymous namespace
62
63void TranslatorVisitor::FFMA_reg(u64 insn) {
64 FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn));
65}
66
67void TranslatorVisitor::FFMA_rc(u64 insn) {
68 FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn));
69}
70
71void TranslatorVisitor::FFMA_cr(u64 insn) {
72 FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn));
73}
74
75void TranslatorVisitor::FFMA_imm(u64 insn) {
76 FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn));
77}
78
79void TranslatorVisitor::FFMA32I(u64 insn) {
80 union {
81 u64 raw;
82 BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register
83 BitField<52, 1, u64> cc;
84 BitField<53, 2, FmzMode> fmz_mode;
85 BitField<55, 1, u64> sat;
86 BitField<56, 1, u64> neg_a;
87 BitField<57, 1, u64> neg_c;
88 } const ffma32i{insn};
89
90 FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false,
91 ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN);
92}
93
94} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
new file mode 100644
index 000000000..c0d6ee5af
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a_reg;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<44, 1, u64> ftz;
19 BitField<45, 1, u64> negate_b;
20 BitField<46, 1, u64> abs_a;
21 BitField<47, 1, u64> cc;
22 BitField<48, 1, u64> negate_a;
23 BitField<49, 1, u64> abs_b;
24 } const fmnmx{insn};
25
26 if (fmnmx.cc) {
27 throw NotImplementedException("FMNMX CC");
28 }
29
30 const IR::U1 pred{v.ir.GetPred(fmnmx.pred)};
31 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)};
32 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)};
33
34 const IR::FpControl control{
35 .no_contraction = false,
36 .rounding = IR::FpRounding::DontCare,
37 .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
38 };
39 IR::F32 max{v.ir.FPMax(op_a, op_b, control)};
40 IR::F32 min{v.ir.FPMin(op_a, op_b, control)};
41
42 if (fmnmx.neg_pred != 0) {
43 std::swap(min, max);
44 }
45
46 v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)});
47}
48} // Anonymous namespace
49
50void TranslatorVisitor::FMNMX_reg(u64 insn) {
51 FMNMX(*this, insn, GetFloatReg20(insn));
52}
53
54void TranslatorVisitor::FMNMX_cbuf(u64 insn) {
55 FMNMX(*this, insn, GetFloatCbuf(insn));
56}
57
58void TranslatorVisitor::FMNMX_imm(u64 insn) {
59 FMNMX(*this, insn, GetFloatImm20(insn));
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
new file mode 100644
index 000000000..2f8605619
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class Operation : u64 {
14 Cos = 0,
15 Sin = 1,
16 Ex2 = 2, // Base 2 exponent
17 Lg2 = 3, // Base 2 logarithm
18 Rcp = 4, // Reciprocal
19 Rsq = 5, // Reciprocal square root
20 Rcp64H = 6, // 64-bit reciprocal
21 Rsq64H = 7, // 64-bit reciprocal square root
22 Sqrt = 8,
23};
24} // Anonymous namespace
25
26void TranslatorVisitor::MUFU(u64 insn) {
27 // MUFU is used to implement a bunch of special functions. See Operation.
28 union {
29 u64 raw;
30 BitField<0, 8, IR::Reg> dest_reg;
31 BitField<8, 8, IR::Reg> src_reg;
32 BitField<20, 4, Operation> operation;
33 BitField<46, 1, u64> abs;
34 BitField<48, 1, u64> neg;
35 BitField<50, 1, u64> sat;
36 } const mufu{insn};
37
38 const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)};
39 IR::F32 value{[&]() -> IR::F32 {
40 switch (mufu.operation) {
41 case Operation::Cos:
42 return ir.FPCos(op_a);
43 case Operation::Sin:
44 return ir.FPSin(op_a);
45 case Operation::Ex2:
46 return ir.FPExp2(op_a);
47 case Operation::Lg2:
48 return ir.FPLog2(op_a);
49 case Operation::Rcp:
50 return ir.FPRecip(op_a);
51 case Operation::Rsq:
52 return ir.FPRecipSqrt(op_a);
53 case Operation::Rcp64H:
54 throw NotImplementedException("MUFU.RCP64H");
55 case Operation::Rsq64H:
56 throw NotImplementedException("MUFU.RSQ64H");
57 case Operation::Sqrt:
58 return ir.FPSqrt(op_a);
59 default:
60 throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value());
61 }
62 }()};
63
64 if (mufu.sat) {
65 value = ir.FPSaturate(value);
66 }
67
68 F(mufu.dest_reg, value);
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
new file mode 100644
index 000000000..06226b7ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -0,0 +1,127 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/ir/ir_emitter.h"
8#include "shader_recompiler/frontend/ir/modifiers.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Scale : u64 {
15 None,
16 D2,
17 D4,
18 D8,
19 M8,
20 M4,
21 M2,
22 INVALIDSCALE37,
23};
24
25float ScaleFactor(Scale scale) {
26 switch (scale) {
27 case Scale::None:
28 return 1.0f;
29 case Scale::D2:
30 return 1.0f / 2.0f;
31 case Scale::D4:
32 return 1.0f / 4.0f;
33 case Scale::D8:
34 return 1.0f / 8.0f;
35 case Scale::M8:
36 return 8.0f;
37 case Scale::M4:
38 return 4.0f;
39 case Scale::M2:
40 return 2.0f;
41 case Scale::INVALIDSCALE37:
42 break;
43 }
44 throw NotImplementedException("Invalid FMUL scale {}", scale);
45}
46
47void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode,
48 FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
49 union {
50 u64 raw;
51 BitField<0, 8, IR::Reg> dest_reg;
52 BitField<8, 8, IR::Reg> src_a;
53 } const fmul{insn};
54
55 if (cc) {
56 throw NotImplementedException("FMUL CC");
57 }
58 IR::F32 op_a{v.F(fmul.src_a)};
59 if (scale != Scale::None) {
60 if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
61 throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
62 }
63 op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
64 }
65 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
66 const IR::FpControl fp_control{
67 .no_contraction = true,
68 .rounding = CastFpRounding(fp_rounding),
69 .fmz_mode = CastFmzMode(fmz_mode),
70 };
71 IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
72 if (fmz_mode == FmzMode::FMZ && !sat) {
73 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
74 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
75 const IR::F32 zero{v.ir.Imm32(0.0f)};
76 const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
77 const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
78 const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
79 value = IR::F32{v.ir.Select(any_zero, zero, value)};
80 }
81 if (sat) {
82 value = v.ir.FPSaturate(value);
83 }
84 v.F(fmul.dest_reg, value);
85}
86
87void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
88 union {
89 u64 raw;
90 BitField<39, 2, FpRounding> fp_rounding;
91 BitField<41, 3, Scale> scale;
92 BitField<44, 2, FmzMode> fmz;
93 BitField<47, 1, u64> cc;
94 BitField<48, 1, u64> neg_b;
95 BitField<50, 1, u64> sat;
96 } const fmul{insn};
97
98 FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
99 fmul.neg_b != 0);
100}
101} // Anonymous namespace
102
103void TranslatorVisitor::FMUL_reg(u64 insn) {
104 return FMUL(*this, insn, GetFloatReg20(insn));
105}
106
107void TranslatorVisitor::FMUL_cbuf(u64 insn) {
108 return FMUL(*this, insn, GetFloatCbuf(insn));
109}
110
111void TranslatorVisitor::FMUL_imm(u64 insn) {
112 return FMUL(*this, insn, GetFloatImm20(insn));
113}
114
115void TranslatorVisitor::FMUL32I(u64 insn) {
116 union {
117 u64 raw;
118 BitField<52, 1, u64> cc;
119 BitField<53, 2, FmzMode> fmz;
120 BitField<55, 1, u64> sat;
121 } const fmul32i{insn};
122
123 FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None,
124 fmul32i.sat != 0, fmul32i.cc != 0, false);
125}
126
127} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
new file mode 100644
index 000000000..f91b93fad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
@@ -0,0 +1,41 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 SINCOS,
13 EX2,
14};
15
16void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) {
17 union {
18 u64 raw;
19 BitField<0, 8, IR::Reg> dest_reg;
20 BitField<39, 1, Mode> mode;
21 BitField<45, 1, u64> neg;
22 BitField<49, 1, u64> abs;
23 } const rro{insn};
24
25 v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0));
26}
27} // Anonymous namespace
28
29void TranslatorVisitor::RRO_reg(u64 insn) {
30 RRO(*this, insn, GetFloatReg20(insn));
31}
32
33void TranslatorVisitor::RRO_cbuf(u64 insn) {
34 RRO(*this, insn, GetFloatCbuf(insn));
35}
36
37void TranslatorVisitor::RRO_imm(u64) {
38 throw NotImplementedException("RRO (imm)");
39}
40
41} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
new file mode 100644
index 000000000..5f93a1513
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
@@ -0,0 +1,60 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 3, IR::Pred> dest_pred_b;
16 BitField<3, 3, IR::Pred> dest_pred_a;
17 BitField<6, 1, u64> negate_b;
18 BitField<7, 1, u64> abs_a;
19 BitField<8, 8, IR::Reg> src_a_reg;
20 BitField<39, 3, IR::Pred> bop_pred;
21 BitField<42, 1, u64> neg_bop_pred;
22 BitField<43, 1, u64> negate_a;
23 BitField<44, 1, u64> abs_b;
24 BitField<45, 2, BooleanOp> bop;
25 BitField<47, 1, u64> ftz;
26 BitField<48, 4, FPCompareOp> compare_op;
27 } const fsetp{insn};
28
29 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)};
30 const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0);
31 const IR::FpControl control{
32 .no_contraction = false,
33 .rounding = IR::FpRounding::DontCare,
34 .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
35 };
36
37 const BooleanOp bop{fsetp.bop};
38 const FPCompareOp compare_op{fsetp.compare_op};
39 const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)};
40 const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)};
41 const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
42 const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
43 v.ir.SetPred(fsetp.dest_pred_a, result_a);
44 v.ir.SetPred(fsetp.dest_pred_b, result_b);
45}
46} // Anonymous namespace
47
48void TranslatorVisitor::FSETP_reg(u64 insn) {
49 FSETP(*this, insn, GetFloatReg20(insn));
50}
51
52void TranslatorVisitor::FSETP_cbuf(u64 insn) {
53 FSETP(*this, insn, GetFloatCbuf(insn));
54}
55
56void TranslatorVisitor::FSETP_imm(u64 insn) {
57 FSETP(*this, insn, GetFloatImm20(insn));
58}
59
60} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
new file mode 100644
index 000000000..7550a8d4c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::FSWZADD(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<28, 8, u64> swizzle;
16 BitField<38, 1, u64> ndv;
17 BitField<39, 2, FpRounding> round;
18 BitField<44, 1, u64> ftz;
19 BitField<47, 1, u64> cc;
20 } const fswzadd{insn};
21
22 if (fswzadd.ndv != 0) {
23 throw NotImplementedException("FSWZADD NDV");
24 }
25
26 const IR::F32 src_a{GetFloatReg8(insn)};
27 const IR::F32 src_b{GetFloatReg20(insn)};
28 const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
29
30 const IR::FpControl fp_control{
31 .no_contraction = false,
32 .rounding = CastFpRounding(fswzadd.round),
33 .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
34 };
35
36 const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
37 F(fswzadd.dest_reg, result);
38
39 if (fswzadd.cc != 0) {
40 throw NotImplementedException("FSWZADD CC");
41 }
42}
43
44} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
new file mode 100644
index 000000000..f2738a93b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
@@ -0,0 +1,125 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,
10 Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {
11 union {
12 u64 raw;
13 BitField<0, 8, IR::Reg> dest_reg;
14 BitField<8, 8, IR::Reg> src_a;
15 } const hadd2{insn};
16
17 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)};
18 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
19 const bool promotion{lhs_a.Type() != lhs_b.Type()};
20 if (promotion) {
21 if (lhs_a.Type() == IR::Type::F16) {
22 lhs_a = v.ir.FPConvert(32, lhs_a);
23 rhs_a = v.ir.FPConvert(32, rhs_a);
24 }
25 if (lhs_b.Type() == IR::Type::F16) {
26 lhs_b = v.ir.FPConvert(32, lhs_b);
27 rhs_b = v.ir.FPConvert(32, rhs_b);
28 }
29 }
30 lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
31 rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
32
33 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
34 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
35
36 const IR::FpControl fp_control{
37 .no_contraction = true,
38 .rounding = IR::FpRounding::DontCare,
39 .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
40 };
41 IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
42 IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
43 if (sat) {
44 lhs = v.ir.FPSaturate(lhs);
45 rhs = v.ir.FPSaturate(rhs);
46 }
47 if (promotion) {
48 lhs = v.ir.FPConvert(16, lhs);
49 rhs = v.ir.FPConvert(16, rhs);
50 }
51 v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge));
52}
53
54void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b,
55 const IR::U32& src_b) {
56 union {
57 u64 raw;
58 BitField<49, 2, Merge> merge;
59 BitField<39, 1, u64> ftz;
60 BitField<43, 1, u64> neg_a;
61 BitField<44, 1, u64> abs_a;
62 BitField<47, 2, Swizzle> swizzle_a;
63 } const hadd2{insn};
64
65 HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0,
66 hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b);
67}
68} // Anonymous namespace
69
70void TranslatorVisitor::HADD2_reg(u64 insn) {
71 union {
72 u64 raw;
73 BitField<32, 1, u64> sat;
74 BitField<31, 1, u64> neg_b;
75 BitField<30, 1, u64> abs_b;
76 BitField<28, 2, Swizzle> swizzle_b;
77 } const hadd2{insn};
78
79 HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b,
80 GetReg20(insn));
81}
82
83void TranslatorVisitor::HADD2_cbuf(u64 insn) {
84 union {
85 u64 raw;
86 BitField<52, 1, u64> sat;
87 BitField<56, 1, u64> neg_b;
88 BitField<54, 1, u64> abs_b;
89 } const hadd2{insn};
90
91 HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32,
92 GetCbuf(insn));
93}
94
95void TranslatorVisitor::HADD2_imm(u64 insn) {
96 union {
97 u64 raw;
98 BitField<52, 1, u64> sat;
99 BitField<56, 1, u64> neg_high;
100 BitField<30, 9, u64> high;
101 BitField<29, 1, u64> neg_low;
102 BitField<20, 9, u64> low;
103 } const hadd2{insn};
104
105 const u32 imm{
106 static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) |
107 static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)};
108 HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm));
109}
110
111void TranslatorVisitor::HADD2_32I(u64 insn) {
112 union {
113 u64 raw;
114 BitField<55, 1, u64> ftz;
115 BitField<52, 1, u64> sat;
116 BitField<56, 1, u64> neg_a;
117 BitField<53, 2, Swizzle> swizzle_a;
118 BitField<20, 32, u64> imm32;
119 } const hadd2{insn};
120
121 const u32 imm{static_cast<u32>(hadd2.imm32)};
122 HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0,
123 hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
124}
125} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fd7986701
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
@@ -0,0 +1,169 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c,
10 Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c,
11 bool sat, HalfPrecision precision) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a;
16 } const hfma2{insn};
17
18 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)};
19 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
20 auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)};
21 const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()};
22 if (promotion) {
23 if (lhs_a.Type() == IR::Type::F16) {
24 lhs_a = v.ir.FPConvert(32, lhs_a);
25 rhs_a = v.ir.FPConvert(32, rhs_a);
26 }
27 if (lhs_b.Type() == IR::Type::F16) {
28 lhs_b = v.ir.FPConvert(32, lhs_b);
29 rhs_b = v.ir.FPConvert(32, rhs_b);
30 }
31 if (lhs_c.Type() == IR::Type::F16) {
32 lhs_c = v.ir.FPConvert(32, lhs_c);
33 rhs_c = v.ir.FPConvert(32, rhs_c);
34 }
35 }
36
37 lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b);
38 rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b);
39
40 lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c);
41 rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c);
42
43 const IR::FpControl fp_control{
44 .no_contraction = true,
45 .rounding = IR::FpRounding::DontCare,
46 .fmz_mode = HalfPrecision2FmzMode(precision),
47 };
48 IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)};
49 IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)};
50 if (precision == HalfPrecision::FMZ && !sat) {
51 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
52 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
53 const IR::F32 zero{v.ir.Imm32(0.0f)};
54 const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
55 const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
56 const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
57 lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)};
58
59 const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
60 const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
61 const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
62 rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)};
63 }
64 if (sat) {
65 lhs = v.ir.FPSaturate(lhs);
66 rhs = v.ir.FPSaturate(rhs);
67 }
68 if (promotion) {
69 lhs = v.ir.FPConvert(16, lhs);
70 rhs = v.ir.FPConvert(16, rhs);
71 }
72 v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge));
73}
74
75void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b,
76 Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat,
77 HalfPrecision precision) {
78 union {
79 u64 raw;
80 BitField<47, 2, Swizzle> swizzle_a;
81 BitField<49, 2, Merge> merge;
82 } const hfma2{insn};
83
84 HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c,
85 sat, precision);
86}
87} // Anonymous namespace
88
89void TranslatorVisitor::HFMA2_reg(u64 insn) {
90 union {
91 u64 raw;
92 BitField<28, 2, Swizzle> swizzle_b;
93 BitField<32, 1, u64> saturate;
94 BitField<31, 1, u64> neg_b;
95 BitField<30, 1, u64> neg_c;
96 BitField<35, 2, Swizzle> swizzle_c;
97 BitField<37, 2, HalfPrecision> precision;
98 } const hfma2{insn};
99
100 HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c,
101 GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
102}
103
104void TranslatorVisitor::HFMA2_rc(u64 insn) {
105 union {
106 u64 raw;
107 BitField<51, 1, u64> neg_c;
108 BitField<52, 1, u64> saturate;
109 BitField<53, 2, Swizzle> swizzle_b;
110 BitField<56, 1, u64> neg_b;
111 BitField<57, 2, HalfPrecision> precision;
112 } const hfma2{insn};
113
114 HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32,
115 GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision);
116}
117
118void TranslatorVisitor::HFMA2_cr(u64 insn) {
119 union {
120 u64 raw;
121 BitField<51, 1, u64> neg_c;
122 BitField<52, 1, u64> saturate;
123 BitField<53, 2, Swizzle> swizzle_c;
124 BitField<56, 1, u64> neg_b;
125 BitField<57, 2, HalfPrecision> precision;
126 } const hfma2{insn};
127
128 HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c,
129 GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
130}
131
132void TranslatorVisitor::HFMA2_imm(u64 insn) {
133 union {
134 u64 raw;
135 BitField<51, 1, u64> neg_c;
136 BitField<52, 1, u64> saturate;
137 BitField<53, 2, Swizzle> swizzle_c;
138
139 BitField<56, 1, u64> neg_high;
140 BitField<30, 9, u64> high;
141 BitField<29, 1, u64> neg_low;
142 BitField<20, 9, u64> low;
143 BitField<57, 2, HalfPrecision> precision;
144 } const hfma2{insn};
145
146 const u32 imm{
147 static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) |
148 static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)};
149
150 HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm),
151 GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
152}
153
154void TranslatorVisitor::HFMA2_32I(u64 insn) {
155 union {
156 u64 raw;
157 BitField<0, 8, IR::Reg> src_c;
158 BitField<20, 32, u64> imm32;
159 BitField<52, 1, u64> neg_c;
160 BitField<53, 2, Swizzle> swizzle_a;
161 BitField<55, 2, HalfPrecision> precision;
162 } const hfma2{insn};
163
164 const u32 imm{static_cast<u32>(hfma2.imm32)};
165 HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0,
166 Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision);
167}
168
169} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
new file mode 100644
index 000000000..0dbeb7f56
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8
9IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) {
10 switch (precision) {
11 case HalfPrecision::None:
12 return IR::FmzMode::None;
13 case HalfPrecision::FTZ:
14 return IR::FmzMode::FTZ;
15 case HalfPrecision::FMZ:
16 return IR::FmzMode::FMZ;
17 default:
18 return IR::FmzMode::DontCare;
19 }
20}
21
22std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) {
23 switch (swizzle) {
24 case Swizzle::H1_H0: {
25 const IR::Value vector{ir.UnpackFloat2x16(value)};
26 return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}};
27 }
28 case Swizzle::H0_H0: {
29 const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)};
30 return {scalar, scalar};
31 }
32 case Swizzle::H1_H1: {
33 const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)};
34 return {scalar, scalar};
35 }
36 case Swizzle::F32: {
37 const IR::F32 scalar{ir.BitCast<IR::F32>(value)};
38 return {scalar, scalar};
39 }
40 }
41 throw InvalidArgument("Invalid swizzle {}", swizzle);
42}
43
44IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
45 Merge merge) {
46 switch (merge) {
47 case Merge::H1_H0:
48 return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs));
49 case Merge::F32:
50 return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs));
51 case Merge::MRG_H0:
52 case Merge::MRG_H1: {
53 const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))};
54 const bool is_h0{merge == Merge::MRG_H0};
55 const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)};
56 return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1));
57 }
58 }
59 throw InvalidArgument("Invalid merge {}", merge);
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
new file mode 100644
index 000000000..59da56a7e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
@@ -0,0 +1,42 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
11#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
12
13namespace Shader::Maxwell {
14
15enum class Merge : u64 {
16 H1_H0,
17 F32,
18 MRG_H0,
19 MRG_H1,
20};
21
22enum class Swizzle : u64 {
23 H1_H0,
24 F32,
25 H0_H0,
26 H1_H1,
27};
28
29enum class HalfPrecision : u64 {
30 None = 0,
31 FTZ = 1,
32 FMZ = 2,
33};
34
35IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision);
36
37std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle);
38
39IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
40 Merge merge);
41
42} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
new file mode 100644
index 000000000..3f548ce76
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
@@ -0,0 +1,143 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a,
10 Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b,
11 HalfPrecision precision) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a;
16 } const hmul2{insn};
17
18 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)};
19 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
20 const bool promotion{lhs_a.Type() != lhs_b.Type()};
21 if (promotion) {
22 if (lhs_a.Type() == IR::Type::F16) {
23 lhs_a = v.ir.FPConvert(32, lhs_a);
24 rhs_a = v.ir.FPConvert(32, rhs_a);
25 }
26 if (lhs_b.Type() == IR::Type::F16) {
27 lhs_b = v.ir.FPConvert(32, lhs_b);
28 rhs_b = v.ir.FPConvert(32, rhs_b);
29 }
30 }
31 lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
32 rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
33
34 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
35 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
36
37 const IR::FpControl fp_control{
38 .no_contraction = true,
39 .rounding = IR::FpRounding::DontCare,
40 .fmz_mode = HalfPrecision2FmzMode(precision),
41 };
42 IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)};
43 IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)};
44 if (precision == HalfPrecision::FMZ && !sat) {
45 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
46 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
47 const IR::F32 zero{v.ir.Imm32(0.0f)};
48 const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
49 const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
50 const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
51 lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)};
52
53 const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
54 const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
55 const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
56 rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)};
57 }
58 if (sat) {
59 lhs = v.ir.FPSaturate(lhs);
60 rhs = v.ir.FPSaturate(rhs);
61 }
62 if (promotion) {
63 lhs = v.ir.FPConvert(16, lhs);
64 rhs = v.ir.FPConvert(16, rhs);
65 }
66 v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge));
67}
68
69void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b,
70 Swizzle swizzle_b, const IR::U32& src_b) {
71 union {
72 u64 raw;
73 BitField<49, 2, Merge> merge;
74 BitField<47, 2, Swizzle> swizzle_a;
75 BitField<39, 2, HalfPrecision> precision;
76 } const hmul2{insn};
77
78 HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b,
79 hmul2.precision);
80}
81} // Anonymous namespace
82
83void TranslatorVisitor::HMUL2_reg(u64 insn) {
84 union {
85 u64 raw;
86 BitField<32, 1, u64> sat;
87 BitField<31, 1, u64> neg_b;
88 BitField<30, 1, u64> abs_b;
89 BitField<44, 1, u64> abs_a;
90 BitField<28, 2, Swizzle> swizzle_b;
91 } const hmul2{insn};
92
93 HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0,
94 hmul2.swizzle_b, GetReg20(insn));
95}
96
97void TranslatorVisitor::HMUL2_cbuf(u64 insn) {
98 union {
99 u64 raw;
100 BitField<52, 1, u64> sat;
101 BitField<54, 1, u64> abs_b;
102 BitField<43, 1, u64> neg_a;
103 BitField<44, 1, u64> abs_a;
104 } const hmul2{insn};
105
106 HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false,
107 Swizzle::F32, GetCbuf(insn));
108}
109
110void TranslatorVisitor::HMUL2_imm(u64 insn) {
111 union {
112 u64 raw;
113 BitField<52, 1, u64> sat;
114 BitField<56, 1, u64> neg_high;
115 BitField<30, 9, u64> high;
116 BitField<29, 1, u64> neg_low;
117 BitField<20, 9, u64> low;
118 BitField<43, 1, u64> neg_a;
119 BitField<44, 1, u64> abs_a;
120 } const hmul2{insn};
121
122 const u32 imm{
123 static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) |
124 static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)};
125 HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false,
126 Swizzle::H1_H0, ir.Imm32(imm));
127}
128
129void TranslatorVisitor::HMUL2_32I(u64 insn) {
130 union {
131 u64 raw;
132 BitField<55, 2, HalfPrecision> precision;
133 BitField<52, 1, u64> sat;
134 BitField<53, 2, Swizzle> swizzle_a;
135 BitField<20, 32, u64> imm32;
136 } const hmul2{insn};
137
138 const u32 imm{static_cast<u32>(hmul2.imm32)};
139 HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false,
140 Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision);
141}
142
143} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
new file mode 100644
index 000000000..cca5b831f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
@@ -0,0 +1,117 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b,
10 bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) {
11 union {
12 u64 insn;
13 BitField<0, 8, IR::Reg> dest_reg;
14 BitField<8, 8, IR::Reg> src_a_reg;
15 BitField<39, 3, IR::Pred> pred;
16 BitField<42, 1, u64> neg_pred;
17 BitField<43, 1, u64> neg_a;
18 BitField<45, 2, BooleanOp> bop;
19 BitField<44, 1, u64> abs_a;
20 BitField<47, 2, Swizzle> swizzle_a;
21 } const hset2{insn};
22
23 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)};
24 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
25
26 if (lhs_a.Type() != lhs_b.Type()) {
27 if (lhs_a.Type() == IR::Type::F16) {
28 lhs_a = v.ir.FPConvert(32, lhs_a);
29 rhs_a = v.ir.FPConvert(32, rhs_a);
30 }
31 if (lhs_b.Type() == IR::Type::F16) {
32 lhs_b = v.ir.FPConvert(32, lhs_b);
33 rhs_b = v.ir.FPConvert(32, rhs_b);
34 }
35 }
36
37 lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
38 rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
39
40 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
41 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
42
43 const IR::FpControl control{
44 .no_contraction = false,
45 .rounding = IR::FpRounding::DontCare,
46 .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
47 };
48
49 IR::U1 pred{v.ir.GetPred(hset2.pred)};
50 if (hset2.neg_pred != 0) {
51 pred = v.ir.LogicalNot(pred);
52 }
53 const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
54 const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
55 const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)};
56 const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)};
57
58 const u32 true_value = bf ? 0x3c00 : 0xffff;
59 const IR::U32 true_val_lhs{v.ir.Imm32(true_value)};
60 const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)};
61 const IR::U32 fail_result{v.ir.Imm32(0)};
62 const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)};
63 const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)};
64
65 v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)});
66}
67} // Anonymous namespace
68
69void TranslatorVisitor::HSET2_reg(u64 insn) {
70 union {
71 u64 insn;
72 BitField<30, 1, u64> abs_b;
73 BitField<49, 1, u64> bf;
74 BitField<31, 1, u64> neg_b;
75 BitField<50, 1, u64> ftz;
76 BitField<35, 4, FPCompareOp> compare_op;
77 BitField<28, 2, Swizzle> swizzle_b;
78 } const hset2{insn};
79
80 HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0,
81 hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b);
82}
83
84void TranslatorVisitor::HSET2_cbuf(u64 insn) {
85 union {
86 u64 insn;
87 BitField<53, 1, u64> bf;
88 BitField<56, 1, u64> neg_b;
89 BitField<54, 1, u64> ftz;
90 BitField<49, 4, FPCompareOp> compare_op;
91 } const hset2{insn};
92
93 HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false,
94 hset2.compare_op, Swizzle::F32);
95}
96
97void TranslatorVisitor::HSET2_imm(u64 insn) {
98 union {
99 u64 insn;
100 BitField<53, 1, u64> bf;
101 BitField<54, 1, u64> ftz;
102 BitField<49, 4, FPCompareOp> compare_op;
103 BitField<56, 1, u64> neg_high;
104 BitField<30, 9, u64> high;
105 BitField<29, 1, u64> neg_low;
106 BitField<20, 9, u64> low;
107 } const hset2{insn};
108
109 const u32 imm{
110 static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) |
111 static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)};
112
113 HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
114 Swizzle::H1_H0);
115}
116
117} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
new file mode 100644
index 000000000..b3931dae3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
@@ -0,0 +1,118 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b,
10 Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) {
11 union {
12 u64 insn;
13 BitField<8, 8, IR::Reg> src_a_reg;
14 BitField<3, 3, IR::Pred> dest_pred_a;
15 BitField<0, 3, IR::Pred> dest_pred_b;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<43, 1, u64> neg_a;
19 BitField<45, 2, BooleanOp> bop;
20 BitField<44, 1, u64> abs_a;
21 BitField<6, 1, u64> ftz;
22 BitField<47, 2, Swizzle> swizzle_a;
23 } const hsetp2{insn};
24
25 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)};
26 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
27
28 if (lhs_a.Type() != lhs_b.Type()) {
29 if (lhs_a.Type() == IR::Type::F16) {
30 lhs_a = v.ir.FPConvert(32, lhs_a);
31 rhs_a = v.ir.FPConvert(32, rhs_a);
32 }
33 if (lhs_b.Type() == IR::Type::F16) {
34 lhs_b = v.ir.FPConvert(32, lhs_b);
35 rhs_b = v.ir.FPConvert(32, rhs_b);
36 }
37 }
38
39 lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
40 rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
41
42 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
43 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
44
45 const IR::FpControl control{
46 .no_contraction = false,
47 .rounding = IR::FpRounding::DontCare,
48 .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
49 };
50
51 IR::U1 pred{v.ir.GetPred(hsetp2.pred)};
52 if (hsetp2.neg_pred != 0) {
53 pred = v.ir.LogicalNot(pred);
54 }
55 const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
56 const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
57 const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)};
58 const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)};
59
60 if (h_and) {
61 auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs);
62 v.ir.SetPred(hsetp2.dest_pred_a, result);
63 v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result));
64 } else {
65 v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs);
66 v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs);
67 }
68}
69} // Anonymous namespace
70
71void TranslatorVisitor::HSETP2_reg(u64 insn) {
72 union {
73 u64 insn;
74 BitField<30, 1, u64> abs_b;
75 BitField<49, 1, u64> h_and;
76 BitField<31, 1, u64> neg_b;
77 BitField<35, 4, FPCompareOp> compare_op;
78 BitField<28, 2, Swizzle> swizzle_b;
79 } const hsetp2{insn};
80 HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b,
81 hsetp2.compare_op, hsetp2.h_and != 0);
82}
83
84void TranslatorVisitor::HSETP2_cbuf(u64 insn) {
85 union {
86 u64 insn;
87 BitField<53, 1, u64> h_and;
88 BitField<54, 1, u64> abs_b;
89 BitField<56, 1, u64> neg_b;
90 BitField<49, 4, FPCompareOp> compare_op;
91 } const hsetp2{insn};
92
93 HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32,
94 hsetp2.compare_op, hsetp2.h_and != 0);
95}
96
97void TranslatorVisitor::HSETP2_imm(u64 insn) {
98 union {
99 u64 insn;
100 BitField<53, 1, u64> h_and;
101 BitField<54, 1, u64> ftz;
102 BitField<49, 4, FPCompareOp> compare_op;
103 BitField<56, 1, u64> neg_high;
104 BitField<30, 9, u64> high;
105 BitField<29, 1, u64> neg_low;
106 BitField<20, 9, u64> low;
107 } const hsetp2{insn};
108
109 const u32 imm{static_cast<u32>(hsetp2.low << 6) |
110 static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
111 static_cast<u32>(hsetp2.high << 22) |
112 static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
113
114 HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op,
115 hsetp2.h_and != 0);
116}
117
118} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
new file mode 100644
index 000000000..b446aae0e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -0,0 +1,272 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "shader_recompiler/frontend/ir/ir_emitter.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding,
12 u32 offset) {
13 if (unaligned) {
14 return ir.Imm32(0);
15 }
16 return ir.GetCbuf(binding, IR::U32{IR::Value{offset}});
17}
18} // Anonymous namespace
19
20IR::U32 TranslatorVisitor::X(IR::Reg reg) {
21 return ir.GetReg(reg);
22}
23
24IR::U64 TranslatorVisitor::L(IR::Reg reg) {
25 if (!IR::IsAligned(reg, 2)) {
26 throw NotImplementedException("Unaligned source register {}", reg);
27 }
28 return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
29}
30
31IR::F32 TranslatorVisitor::F(IR::Reg reg) {
32 return ir.BitCast<IR::F32>(X(reg));
33}
34
35IR::F64 TranslatorVisitor::D(IR::Reg reg) {
36 if (!IR::IsAligned(reg, 2)) {
37 throw NotImplementedException("Unaligned source register {}", reg);
38 }
39 return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
40}
41
42void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
43 ir.SetReg(dest_reg, value);
44}
45
46void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
47 if (!IR::IsAligned(dest_reg, 2)) {
48 throw NotImplementedException("Unaligned destination register {}", dest_reg);
49 }
50 const IR::Value result{ir.UnpackUint2x32(value)};
51 for (int i = 0; i < 2; i++) {
52 X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
53 }
54}
55
56void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
57 X(dest_reg, ir.BitCast<IR::U32>(value));
58}
59
60void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) {
61 if (!IR::IsAligned(dest_reg, 2)) {
62 throw NotImplementedException("Unaligned destination register {}", dest_reg);
63 }
64 const IR::Value result{ir.UnpackDouble2x32(value)};
65 for (int i = 0; i < 2; i++) {
66 X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
67 }
68}
69
70IR::U32 TranslatorVisitor::GetReg8(u64 insn) {
71 union {
72 u64 raw;
73 BitField<8, 8, IR::Reg> index;
74 } const reg{insn};
75 return X(reg.index);
76}
77
78IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
79 union {
80 u64 raw;
81 BitField<20, 8, IR::Reg> index;
82 } const reg{insn};
83 return X(reg.index);
84}
85
86IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
87 union {
88 u64 raw;
89 BitField<39, 8, IR::Reg> index;
90 } const reg{insn};
91 return X(reg.index);
92}
93
94IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) {
95 return ir.BitCast<IR::F32>(GetReg8(insn));
96}
97
98IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) {
99 return ir.BitCast<IR::F32>(GetReg20(insn));
100}
101
102IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) {
103 return ir.BitCast<IR::F32>(GetReg39(insn));
104}
105
106IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) {
107 union {
108 u64 raw;
109 BitField<20, 8, IR::Reg> index;
110 } const reg{insn};
111 return D(reg.index);
112}
113
114IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) {
115 union {
116 u64 raw;
117 BitField<39, 8, IR::Reg> index;
118 } const reg{insn};
119 return D(reg.index);
120}
121
122static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
123 union {
124 u64 raw;
125 BitField<20, 14, u64> offset;
126 BitField<34, 5, u64> binding;
127 } const cbuf{insn};
128
129 if (cbuf.binding >= 18) {
130 throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
131 }
132 if (cbuf.offset >= 0x10'000) {
133 throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset);
134 }
135 const IR::Value binding{static_cast<u32>(cbuf.binding)};
136 const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4};
137 return {IR::U32{binding}, IR::U32{byte_offset}};
138}
139
140IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
141 const auto [binding, byte_offset]{CbufAddr(insn)};
142 return ir.GetCbuf(binding, byte_offset);
143}
144
145IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) {
146 const auto [binding, byte_offset]{CbufAddr(insn)};
147 return ir.GetFloatCbuf(binding, byte_offset);
148}
149
150IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
151 union {
152 u64 raw;
153 BitField<20, 1, u64> unaligned;
154 } const cbuf{insn};
155
156 const auto [binding, offset_value]{CbufAddr(insn)};
157 const bool unaligned{cbuf.unaligned != 0};
158 const u32 offset{offset_value.U32()};
159 const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u};
160
161 const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})};
162 const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)};
163 return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value));
164}
165
166IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) {
167 union {
168 u64 raw;
169 BitField<20, 1, u64> unaligned;
170 } const cbuf{insn};
171
172 if (cbuf.unaligned != 0) {
173 throw NotImplementedException("Unaligned packed constant buffer read");
174 }
175 const auto [binding, lower_offset]{CbufAddr(insn)};
176 const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)};
177 const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)};
178 const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)};
179 return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value));
180}
181
182IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
183 union {
184 u64 raw;
185 BitField<20, 19, u64> value;
186 BitField<56, 1, u64> is_negative;
187 } const imm{insn};
188
189 if (imm.is_negative != 0) {
190 const s64 raw{static_cast<s64>(imm.value)};
191 return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw));
192 } else {
193 return ir.Imm32(static_cast<u32>(imm.value));
194 }
195}
196
197IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
198 union {
199 u64 raw;
200 BitField<20, 19, u64> value;
201 BitField<56, 1, u64> is_negative;
202 } const imm{insn};
203 const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)};
204 const u32 value{static_cast<u32>(imm.value) << 12};
205 return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
206}
207
208IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) {
209 union {
210 u64 raw;
211 BitField<20, 19, u64> value;
212 BitField<56, 1, u64> is_negative;
213 } const imm{insn};
214 const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0};
215 const u64 value{imm.value << 44};
216 return ir.Imm64(Common::BitCast<f64>(value | sign_bit));
217}
218
219IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) {
220 const s64 value{GetImm20(insn).U32()};
221 return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32));
222}
223
224IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
225 union {
226 u64 raw;
227 BitField<20, 32, u64> value;
228 } const imm{insn};
229 return ir.Imm32(static_cast<u32>(imm.value));
230}
231
232IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) {
233 union {
234 u64 raw;
235 BitField<20, 32, u64> value;
236 } const imm{insn};
237 return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value)));
238}
239
240void TranslatorVisitor::SetZFlag(const IR::U1& value) {
241 ir.SetZFlag(value);
242}
243
244void TranslatorVisitor::SetSFlag(const IR::U1& value) {
245 ir.SetSFlag(value);
246}
247
248void TranslatorVisitor::SetCFlag(const IR::U1& value) {
249 ir.SetCFlag(value);
250}
251
252void TranslatorVisitor::SetOFlag(const IR::U1& value) {
253 ir.SetOFlag(value);
254}
255
256void TranslatorVisitor::ResetZero() {
257 SetZFlag(ir.Imm1(false));
258}
259
260void TranslatorVisitor::ResetSFlag() {
261 SetSFlag(ir.Imm1(false));
262}
263
264void TranslatorVisitor::ResetCFlag() {
265 SetCFlag(ir.Imm1(false));
266}
267
268void TranslatorVisitor::ResetOFlag() {
269 SetOFlag(ir.Imm1(false));
270}
271
272} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
new file mode 100644
index 000000000..335e4f24f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -0,0 +1,387 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/environment.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9#include "shader_recompiler/frontend/ir/ir_emitter.h"
10#include "shader_recompiler/frontend/maxwell/instruction.h"
11
12namespace Shader::Maxwell {
13
14enum class CompareOp : u64 {
15 False,
16 LessThan,
17 Equal,
18 LessThanEqual,
19 GreaterThan,
20 NotEqual,
21 GreaterThanEqual,
22 True,
23};
24
25enum class BooleanOp : u64 {
26 AND,
27 OR,
28 XOR,
29};
30
31enum class PredicateOp : u64 {
32 False,
33 True,
34 Zero,
35 NonZero,
36};
37
38enum class FPCompareOp : u64 {
39 F,
40 LT,
41 EQ,
42 LE,
43 GT,
44 NE,
45 GE,
46 NUM,
47 Nan,
48 LTU,
49 EQU,
50 LEU,
51 GTU,
52 NEU,
53 GEU,
54 T,
55};
56
57class TranslatorVisitor {
58public:
59 explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {}
60
61 Environment& env;
62 IR::IREmitter ir;
63
64 void AL2P(u64 insn);
65 void ALD(u64 insn);
66 void AST(u64 insn);
67 void ATOM_cas(u64 insn);
68 void ATOM(u64 insn);
69 void ATOMS_cas(u64 insn);
70 void ATOMS(u64 insn);
71 void B2R(u64 insn);
72 void BAR(u64 insn);
73 void BFE_reg(u64 insn);
74 void BFE_cbuf(u64 insn);
75 void BFE_imm(u64 insn);
76 void BFI_reg(u64 insn);
77 void BFI_rc(u64 insn);
78 void BFI_cr(u64 insn);
79 void BFI_imm(u64 insn);
80 void BPT(u64 insn);
81 void BRA(u64 insn);
82 void BRK(u64 insn);
83 void BRX(u64 insn);
84 void CAL();
85 void CCTL(u64 insn);
86 void CCTLL(u64 insn);
87 void CONT(u64 insn);
88 void CS2R(u64 insn);
89 void CSET(u64 insn);
90 void CSETP(u64 insn);
91 void DADD_reg(u64 insn);
92 void DADD_cbuf(u64 insn);
93 void DADD_imm(u64 insn);
94 void DEPBAR();
95 void DFMA_reg(u64 insn);
96 void DFMA_rc(u64 insn);
97 void DFMA_cr(u64 insn);
98 void DFMA_imm(u64 insn);
99 void DMNMX_reg(u64 insn);
100 void DMNMX_cbuf(u64 insn);
101 void DMNMX_imm(u64 insn);
102 void DMUL_reg(u64 insn);
103 void DMUL_cbuf(u64 insn);
104 void DMUL_imm(u64 insn);
105 void DSET_reg(u64 insn);
106 void DSET_cbuf(u64 insn);
107 void DSET_imm(u64 insn);
108 void DSETP_reg(u64 insn);
109 void DSETP_cbuf(u64 insn);
110 void DSETP_imm(u64 insn);
111 void EXIT();
112 void F2F_reg(u64 insn);
113 void F2F_cbuf(u64 insn);
114 void F2F_imm(u64 insn);
115 void F2I_reg(u64 insn);
116 void F2I_cbuf(u64 insn);
117 void F2I_imm(u64 insn);
118 void FADD_reg(u64 insn);
119 void FADD_cbuf(u64 insn);
120 void FADD_imm(u64 insn);
121 void FADD32I(u64 insn);
122 void FCHK_reg(u64 insn);
123 void FCHK_cbuf(u64 insn);
124 void FCHK_imm(u64 insn);
125 void FCMP_reg(u64 insn);
126 void FCMP_rc(u64 insn);
127 void FCMP_cr(u64 insn);
128 void FCMP_imm(u64 insn);
129 void FFMA_reg(u64 insn);
130 void FFMA_rc(u64 insn);
131 void FFMA_cr(u64 insn);
132 void FFMA_imm(u64 insn);
133 void FFMA32I(u64 insn);
134 void FLO_reg(u64 insn);
135 void FLO_cbuf(u64 insn);
136 void FLO_imm(u64 insn);
137 void FMNMX_reg(u64 insn);
138 void FMNMX_cbuf(u64 insn);
139 void FMNMX_imm(u64 insn);
140 void FMUL_reg(u64 insn);
141 void FMUL_cbuf(u64 insn);
142 void FMUL_imm(u64 insn);
143 void FMUL32I(u64 insn);
144 void FSET_reg(u64 insn);
145 void FSET_cbuf(u64 insn);
146 void FSET_imm(u64 insn);
147 void FSETP_reg(u64 insn);
148 void FSETP_cbuf(u64 insn);
149 void FSETP_imm(u64 insn);
150 void FSWZADD(u64 insn);
151 void GETCRSPTR(u64 insn);
152 void GETLMEMBASE(u64 insn);
153 void HADD2_reg(u64 insn);
154 void HADD2_cbuf(u64 insn);
155 void HADD2_imm(u64 insn);
156 void HADD2_32I(u64 insn);
157 void HFMA2_reg(u64 insn);
158 void HFMA2_rc(u64 insn);
159 void HFMA2_cr(u64 insn);
160 void HFMA2_imm(u64 insn);
161 void HFMA2_32I(u64 insn);
162 void HMUL2_reg(u64 insn);
163 void HMUL2_cbuf(u64 insn);
164 void HMUL2_imm(u64 insn);
165 void HMUL2_32I(u64 insn);
166 void HSET2_reg(u64 insn);
167 void HSET2_cbuf(u64 insn);
168 void HSET2_imm(u64 insn);
169 void HSETP2_reg(u64 insn);
170 void HSETP2_cbuf(u64 insn);
171 void HSETP2_imm(u64 insn);
172 void I2F_reg(u64 insn);
173 void I2F_cbuf(u64 insn);
174 void I2F_imm(u64 insn);
175 void I2I_reg(u64 insn);
176 void I2I_cbuf(u64 insn);
177 void I2I_imm(u64 insn);
178 void IADD_reg(u64 insn);
179 void IADD_cbuf(u64 insn);
180 void IADD_imm(u64 insn);
181 void IADD3_reg(u64 insn);
182 void IADD3_cbuf(u64 insn);
183 void IADD3_imm(u64 insn);
184 void IADD32I(u64 insn);
185 void ICMP_reg(u64 insn);
186 void ICMP_rc(u64 insn);
187 void ICMP_cr(u64 insn);
188 void ICMP_imm(u64 insn);
189 void IDE(u64 insn);
190 void IDP_reg(u64 insn);
191 void IDP_imm(u64 insn);
192 void IMAD_reg(u64 insn);
193 void IMAD_rc(u64 insn);
194 void IMAD_cr(u64 insn);
195 void IMAD_imm(u64 insn);
196 void IMAD32I(u64 insn);
197 void IMADSP_reg(u64 insn);
198 void IMADSP_rc(u64 insn);
199 void IMADSP_cr(u64 insn);
200 void IMADSP_imm(u64 insn);
201 void IMNMX_reg(u64 insn);
202 void IMNMX_cbuf(u64 insn);
203 void IMNMX_imm(u64 insn);
204 void IMUL_reg(u64 insn);
205 void IMUL_cbuf(u64 insn);
206 void IMUL_imm(u64 insn);
207 void IMUL32I(u64 insn);
208 void IPA(u64 insn);
209 void ISBERD(u64 insn);
210 void ISCADD_reg(u64 insn);
211 void ISCADD_cbuf(u64 insn);
212 void ISCADD_imm(u64 insn);
213 void ISCADD32I(u64 insn);
214 void ISET_reg(u64 insn);
215 void ISET_cbuf(u64 insn);
216 void ISET_imm(u64 insn);
217 void ISETP_reg(u64 insn);
218 void ISETP_cbuf(u64 insn);
219 void ISETP_imm(u64 insn);
220 void JCAL(u64 insn);
221 void JMP(u64 insn);
222 void JMX(u64 insn);
223 void KIL();
224 void LD(u64 insn);
225 void LDC(u64 insn);
226 void LDG(u64 insn);
227 void LDL(u64 insn);
228 void LDS(u64 insn);
229 void LEA_hi_reg(u64 insn);
230 void LEA_hi_cbuf(u64 insn);
231 void LEA_lo_reg(u64 insn);
232 void LEA_lo_cbuf(u64 insn);
233 void LEA_lo_imm(u64 insn);
234 void LEPC(u64 insn);
235 void LONGJMP(u64 insn);
236 void LOP_reg(u64 insn);
237 void LOP_cbuf(u64 insn);
238 void LOP_imm(u64 insn);
239 void LOP3_reg(u64 insn);
240 void LOP3_cbuf(u64 insn);
241 void LOP3_imm(u64 insn);
242 void LOP32I(u64 insn);
243 void MEMBAR(u64 insn);
244 void MOV_reg(u64 insn);
245 void MOV_cbuf(u64 insn);
246 void MOV_imm(u64 insn);
247 void MOV32I(u64 insn);
248 void MUFU(u64 insn);
249 void NOP(u64 insn);
250 void OUT_reg(u64 insn);
251 void OUT_cbuf(u64 insn);
252 void OUT_imm(u64 insn);
253 void P2R_reg(u64 insn);
254 void P2R_cbuf(u64 insn);
255 void P2R_imm(u64 insn);
256 void PBK();
257 void PCNT();
258 void PEXIT(u64 insn);
259 void PIXLD(u64 insn);
260 void PLONGJMP(u64 insn);
261 void POPC_reg(u64 insn);
262 void POPC_cbuf(u64 insn);
263 void POPC_imm(u64 insn);
264 void PRET(u64 insn);
265 void PRMT_reg(u64 insn);
266 void PRMT_rc(u64 insn);
267 void PRMT_cr(u64 insn);
268 void PRMT_imm(u64 insn);
269 void PSET(u64 insn);
270 void PSETP(u64 insn);
271 void R2B(u64 insn);
272 void R2P_reg(u64 insn);
273 void R2P_cbuf(u64 insn);
274 void R2P_imm(u64 insn);
275 void RAM(u64 insn);
276 void RED(u64 insn);
277 void RET(u64 insn);
278 void RRO_reg(u64 insn);
279 void RRO_cbuf(u64 insn);
280 void RRO_imm(u64 insn);
281 void RTT(u64 insn);
282 void S2R(u64 insn);
283 void SAM(u64 insn);
284 void SEL_reg(u64 insn);
285 void SEL_cbuf(u64 insn);
286 void SEL_imm(u64 insn);
287 void SETCRSPTR(u64 insn);
288 void SETLMEMBASE(u64 insn);
289 void SHF_l_reg(u64 insn);
290 void SHF_l_imm(u64 insn);
291 void SHF_r_reg(u64 insn);
292 void SHF_r_imm(u64 insn);
293 void SHFL(u64 insn);
294 void SHL_reg(u64 insn);
295 void SHL_cbuf(u64 insn);
296 void SHL_imm(u64 insn);
297 void SHR_reg(u64 insn);
298 void SHR_cbuf(u64 insn);
299 void SHR_imm(u64 insn);
300 void SSY();
301 void ST(u64 insn);
302 void STG(u64 insn);
303 void STL(u64 insn);
304 void STP(u64 insn);
305 void STS(u64 insn);
306 void SUATOM(u64 insn);
307 void SUATOM_cas(u64 insn);
308 void SULD(u64 insn);
309 void SURED(u64 insn);
310 void SUST(u64 insn);
311 void SYNC(u64 insn);
312 void TEX(u64 insn);
313 void TEX_b(u64 insn);
314 void TEXS(u64 insn);
315 void TLD(u64 insn);
316 void TLD_b(u64 insn);
317 void TLD4(u64 insn);
318 void TLD4_b(u64 insn);
319 void TLD4S(u64 insn);
320 void TLDS(u64 insn);
321 void TMML(u64 insn);
322 void TMML_b(u64 insn);
323 void TXA(u64 insn);
324 void TXD(u64 insn);
325 void TXD_b(u64 insn);
326 void TXQ(u64 insn);
327 void TXQ_b(u64 insn);
328 void VABSDIFF(u64 insn);
329 void VABSDIFF4(u64 insn);
330 void VADD(u64 insn);
331 void VMAD(u64 insn);
332 void VMNMX(u64 insn);
333 void VOTE(u64 insn);
334 void VOTE_vtg(u64 insn);
335 void VSET(u64 insn);
336 void VSETP(u64 insn);
337 void VSHL(u64 insn);
338 void VSHR(u64 insn);
339 void XMAD_reg(u64 insn);
340 void XMAD_rc(u64 insn);
341 void XMAD_cr(u64 insn);
342 void XMAD_imm(u64 insn);
343
344 [[nodiscard]] IR::U32 X(IR::Reg reg);
345 [[nodiscard]] IR::U64 L(IR::Reg reg);
346 [[nodiscard]] IR::F32 F(IR::Reg reg);
347 [[nodiscard]] IR::F64 D(IR::Reg reg);
348
349 void X(IR::Reg dest_reg, const IR::U32& value);
350 void L(IR::Reg dest_reg, const IR::U64& value);
351 void F(IR::Reg dest_reg, const IR::F32& value);
352 void D(IR::Reg dest_reg, const IR::F64& value);
353
354 [[nodiscard]] IR::U32 GetReg8(u64 insn);
355 [[nodiscard]] IR::U32 GetReg20(u64 insn);
356 [[nodiscard]] IR::U32 GetReg39(u64 insn);
357 [[nodiscard]] IR::F32 GetFloatReg8(u64 insn);
358 [[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
359 [[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
360 [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn);
361 [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn);
362
363 [[nodiscard]] IR::U32 GetCbuf(u64 insn);
364 [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
365 [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn);
366 [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn);
367
368 [[nodiscard]] IR::U32 GetImm20(u64 insn);
369 [[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
370 [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn);
371 [[nodiscard]] IR::U64 GetPackedImm20(u64 insn);
372
373 [[nodiscard]] IR::U32 GetImm32(u64 insn);
374 [[nodiscard]] IR::F32 GetFloatImm32(u64 insn);
375
376 void SetZFlag(const IR::U1& value);
377 void SetSFlag(const IR::U1& value);
378 void SetCFlag(const IR::U1& value);
379 void SetOFlag(const IR::U1& value);
380
381 void ResetZero();
382 void ResetSFlag();
383 void ResetCFlag();
384 void ResetOFlag();
385};
386
387} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
new file mode 100644
index 000000000..8ffd84867
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
@@ -0,0 +1,105 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x,
12 bool cc) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a;
17 } const iadd{insn};
18
19 if (sat) {
20 throw NotImplementedException("IADD SAT");
21 }
22 if (x && po) {
23 throw NotImplementedException("IADD X+PO");
24 }
25 // Operand A is always read from here, negated if needed
26 IR::U32 op_a{v.X(iadd.src_a)};
27 if (neg_a) {
28 op_a = v.ir.INeg(op_a);
29 }
30 // Add both operands
31 IR::U32 result{v.ir.IAdd(op_a, op_b)};
32 if (x) {
33 const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
34 result = v.ir.IAdd(result, carry);
35 }
36 if (po) {
37 // .PO adds one to the result
38 result = v.ir.IAdd(result, v.ir.Imm32(1));
39 }
40 if (cc) {
41 // Store flags
42 // TODO: Does this grab the result pre-PO or after?
43 if (po) {
44 throw NotImplementedException("IADD CC+PO");
45 }
46 // TODO: How does CC behave when X is set?
47 if (x) {
48 throw NotImplementedException("IADD X+CC");
49 }
50 v.SetZFlag(v.ir.GetZeroFromOp(result));
51 v.SetSFlag(v.ir.GetSignFromOp(result));
52 v.SetCFlag(v.ir.GetCarryFromOp(result));
53 v.SetOFlag(v.ir.GetOverflowFromOp(result));
54 }
55 // Store result
56 v.X(iadd.dest_reg, result);
57}
58
59void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
60 union {
61 u64 insn;
62 BitField<43, 1, u64> x;
63 BitField<47, 1, u64> cc;
64 BitField<48, 2, u64> three_for_po;
65 BitField<48, 1, u64> neg_b;
66 BitField<49, 1, u64> neg_a;
67 BitField<50, 1, u64> sat;
68 } const iadd{insn};
69
70 const bool po{iadd.three_for_po == 3};
71 if (!po && iadd.neg_b != 0) {
72 op_b = v.ir.INeg(op_b);
73 }
74 IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0);
75}
76} // Anonymous namespace
77
78void TranslatorVisitor::IADD_reg(u64 insn) {
79 IADD(*this, insn, GetReg20(insn));
80}
81
82void TranslatorVisitor::IADD_cbuf(u64 insn) {
83 IADD(*this, insn, GetCbuf(insn));
84}
85
86void TranslatorVisitor::IADD_imm(u64 insn) {
87 IADD(*this, insn, GetImm20(insn));
88}
89
90void TranslatorVisitor::IADD32I(u64 insn) {
91 union {
92 u64 raw;
93 BitField<52, 1, u64> cc;
94 BitField<53, 1, u64> x;
95 BitField<54, 1, u64> sat;
96 BitField<55, 2, u64> three_for_po;
97 BitField<56, 1, u64> neg_a;
98 } const iadd32i{insn};
99
100 const bool po{iadd32i.three_for_po == 3};
101 const bool neg_a{!po && iadd32i.neg_a != 0};
102 IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0);
103}
104
105} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
new file mode 100644
index 000000000..040cfc10f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
@@ -0,0 +1,122 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Shift : u64 {
12 None,
13 Right,
14 Left,
15};
16enum class Half : u64 {
17 All,
18 Lower,
19 Upper,
20};
21
22[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) {
23 constexpr bool is_signed{false};
24 switch (half) {
25 case Half::All:
26 return value;
27 case Half::Lower:
28 return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed);
29 case Half::Upper:
30 return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed);
31 }
32 throw NotImplementedException("Invalid half");
33}
34
35[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) {
36 switch (shift) {
37 case Shift::None:
38 return value;
39 case Shift::Right: {
40 // 33-bit RS IADD3 edge case
41 const IR::U1 edge_case{ir.GetCarryFromOp(value)};
42 const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))};
43 return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)};
44 }
45 case Shift::Left:
46 return ir.ShiftLeftLogical(value, ir.Imm32(16));
47 }
48 throw NotImplementedException("Invalid shift");
49}
50
51void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c,
52 Shift shift = Shift::None) {
53 union {
54 u64 insn;
55 BitField<0, 8, IR::Reg> dest_reg;
56 BitField<47, 1, u64> cc;
57 BitField<48, 1, u64> x;
58 BitField<49, 1, u64> neg_c;
59 BitField<50, 1, u64> neg_b;
60 BitField<51, 1, u64> neg_a;
61 } iadd3{insn};
62
63 if (iadd3.neg_a != 0) {
64 op_a = v.ir.INeg(op_a);
65 }
66 if (iadd3.neg_b != 0) {
67 op_b = v.ir.INeg(op_b);
68 }
69 if (iadd3.neg_c != 0) {
70 op_c = v.ir.INeg(op_c);
71 }
72 IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)};
73 if (iadd3.x != 0) {
74 // TODO: How does RS behave when X is set?
75 if (shift == Shift::Right) {
76 throw NotImplementedException("IADD3 X+RS");
77 }
78 const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
79 lhs_1 = v.ir.IAdd(lhs_1, carry);
80 }
81 const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)};
82 const IR::U32 result{v.ir.IAdd(lhs_2, op_c)};
83
84 v.X(iadd3.dest_reg, result);
85 if (iadd3.cc != 0) {
86 // TODO: How does CC behave when X is set?
87 if (iadd3.x != 0) {
88 throw NotImplementedException("IADD3 X+CC");
89 }
90 v.SetZFlag(v.ir.GetZeroFromOp(result));
91 v.SetSFlag(v.ir.GetSignFromOp(result));
92 v.SetCFlag(v.ir.GetCarryFromOp(result));
93 const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)};
94 v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1));
95 }
96}
97} // Anonymous namespace
98
99void TranslatorVisitor::IADD3_reg(u64 insn) {
100 union {
101 u64 insn;
102 BitField<37, 2, Shift> shift;
103 BitField<35, 2, Half> half_a;
104 BitField<33, 2, Half> half_b;
105 BitField<31, 2, Half> half_c;
106 } const iadd3{insn};
107
108 const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)};
109 const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)};
110 const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)};
111 IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift);
112}
113
114void TranslatorVisitor::IADD3_cbuf(u64 insn) {
115 IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn));
116}
117
118void TranslatorVisitor::IADD3_imm(u64 insn) {
119 IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn));
120}
121
122} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
new file mode 100644
index 000000000..ba6e01926
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
@@ -0,0 +1,48 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_reg;
17 BitField<48, 1, u64> is_signed;
18 BitField<49, 3, CompareOp> compare_op;
19 } const icmp{insn};
20
21 const IR::U32 zero{v.ir.Imm32(0)};
22 const bool is_signed{icmp.is_signed != 0};
23 const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)};
24
25 const IR::U32 src_reg{v.X(icmp.src_reg)};
26 const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
27
28 v.X(icmp.dest_reg, result);
29}
30} // Anonymous namespace
31
32void TranslatorVisitor::ICMP_reg(u64 insn) {
33 ICMP(*this, insn, GetReg20(insn), GetReg39(insn));
34}
35
36void TranslatorVisitor::ICMP_rc(u64 insn) {
37 ICMP(*this, insn, GetReg39(insn), GetCbuf(insn));
38}
39
40void TranslatorVisitor::ICMP_cr(u64 insn) {
41 ICMP(*this, insn, GetCbuf(insn), GetReg39(insn));
42}
43
44void TranslatorVisitor::ICMP_imm(u64 insn) {
45 ICMP(*this, insn, GetImm20(insn), GetReg39(insn));
46}
47
48} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
new file mode 100644
index 000000000..8ce1aee04
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
@@ -0,0 +1,80 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
13 CompareOp compare_op, bool is_signed, bool x) {
14 return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
15 : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
16}
17
18void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
19 union {
20 u64 insn;
21 BitField<0, 8, IR::Reg> dest_reg;
22 BitField<8, 8, IR::Reg> src_reg;
23 BitField<39, 3, IR::Pred> pred;
24 BitField<42, 1, u64> neg_pred;
25 BitField<43, 1, u64> x;
26 BitField<44, 1, u64> bf;
27 BitField<45, 2, BooleanOp> bop;
28 BitField<47, 1, u64> cc;
29 BitField<48, 1, u64> is_signed;
30 BitField<49, 3, CompareOp> compare_op;
31 } const iset{insn};
32
33 const IR::U32 src_a{v.X(iset.src_reg)};
34 const bool is_signed{iset.is_signed != 0};
35 const IR::U32 zero{v.ir.Imm32(0)};
36 const bool x{iset.x != 0};
37 const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)};
38
39 IR::U1 pred{v.ir.GetPred(iset.pred)};
40 if (iset.neg_pred != 0) {
41 pred = v.ir.LogicalNot(pred);
42 }
43 const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)};
44
45 const IR::U32 one_mask{v.ir.Imm32(-1)};
46 const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
47 const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one};
48 const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
49
50 v.X(iset.dest_reg, result);
51 if (iset.cc != 0) {
52 if (x) {
53 throw NotImplementedException("ISET.CC + X");
54 }
55 const IR::U1 is_zero{v.ir.IEqual(result, zero)};
56 v.SetZFlag(is_zero);
57 if (iset.bf != 0) {
58 v.ResetSFlag();
59 } else {
60 v.SetSFlag(v.ir.LogicalNot(is_zero));
61 }
62 v.ResetCFlag();
63 v.ResetOFlag();
64 }
65}
66} // Anonymous namespace
67
68void TranslatorVisitor::ISET_reg(u64 insn) {
69 ISET(*this, insn, GetReg20(insn));
70}
71
72void TranslatorVisitor::ISET_cbuf(u64 insn) {
73 ISET(*this, insn, GetCbuf(insn));
74}
75
76void TranslatorVisitor::ISET_imm(u64 insn) {
77 ISET(*this, insn, GetImm20(insn));
78}
79
80} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
new file mode 100644
index 000000000..0b8119ddd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
@@ -0,0 +1,182 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12enum class FloatFormat : u64 {
13 F16 = 1,
14 F32 = 2,
15 F64 = 3,
16};
17
18enum class IntFormat : u64 {
19 U8 = 0,
20 U16 = 1,
21 U32 = 2,
22 U64 = 3,
23};
24
25union Encoding {
26 u64 raw;
27 BitField<0, 8, IR::Reg> dest_reg;
28 BitField<8, 2, FloatFormat> float_format;
29 BitField<10, 2, IntFormat> int_format;
30 BitField<13, 1, u64> is_signed;
31 BitField<39, 2, FpRounding> fp_rounding;
32 BitField<41, 2, u64> selector;
33 BitField<47, 1, u64> cc;
34 BitField<45, 1, u64> neg;
35 BitField<49, 1, u64> abs;
36};
37
38bool Is64(u64 insn) {
39 return Encoding{insn}.int_format == IntFormat::U64;
40}
41
42int BitSize(FloatFormat format) {
43 switch (format) {
44 case FloatFormat::F16:
45 return 16;
46 case FloatFormat::F32:
47 return 32;
48 case FloatFormat::F64:
49 return 64;
50 }
51 throw NotImplementedException("Invalid float format {}", format);
52}
53
54IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) {
55 const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))};
56 const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))};
57 const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)};
58 const IR::U1 is_least{v.ir.IEqual(value, least_value)};
59 return IR::U32{v.ir.Select(is_least, value, absolute)};
60}
61
62void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
63 const Encoding i2f{insn};
64 if (i2f.cc != 0) {
65 throw NotImplementedException("I2F CC");
66 }
67 const bool is_signed{i2f.is_signed != 0};
68 int src_bitsize{};
69 switch (i2f.int_format) {
70 case IntFormat::U8:
71 src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
72 v.ir.Imm32(8), is_signed);
73 if (i2f.abs != 0) {
74 src = SmallAbs(v, src, 8);
75 }
76 src_bitsize = 8;
77 break;
78 case IntFormat::U16:
79 if (i2f.selector == 1 || i2f.selector == 3) {
80 throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value());
81 }
82 src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
83 v.ir.Imm32(16), is_signed);
84 if (i2f.abs != 0) {
85 src = SmallAbs(v, src, 16);
86 }
87 src_bitsize = 16;
88 break;
89 case IntFormat::U32:
90 case IntFormat::U64:
91 if (i2f.selector != 0) {
92 throw NotImplementedException("Unexpected selector {}", i2f.selector.Value());
93 }
94 if (i2f.abs != 0 && is_signed) {
95 src = v.ir.IAbs(src);
96 }
97 src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32;
98 break;
99 }
100 const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32};
101 const int dst_bitsize{BitSize(i2f.float_format)};
102 const IR::FpControl fp_control{
103 .no_contraction = false,
104 .rounding = CastFpRounding(i2f.fp_rounding),
105 .fmz_mode = IR::FmzMode::DontCare,
106 };
107 auto value{v.ir.ConvertIToF(static_cast<size_t>(dst_bitsize),
108 static_cast<size_t>(conversion_src_bitsize), is_signed, src,
109 fp_control)};
110 if (i2f.neg != 0) {
111 if (i2f.abs != 0 || !is_signed) {
112 // We know the value is positive
113 value = v.ir.FPNeg(value);
114 } else {
115 // Only negate if the input isn't the lowest value
116 IR::U1 is_least;
117 if (src_bitsize == 64) {
118 is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min()));
119 } else if (src_bitsize == 32) {
120 is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min()));
121 } else {
122 const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))};
123 is_least = v.ir.IEqual(src, least_value);
124 }
125 value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))};
126 }
127 }
128 switch (i2f.float_format) {
129 case FloatFormat::F16: {
130 const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
131 v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
132 break;
133 }
134 case FloatFormat::F32:
135 v.F(i2f.dest_reg, value);
136 break;
137 case FloatFormat::F64: {
138 if (!IR::IsAligned(i2f.dest_reg, 2)) {
139 throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
140 }
141 const IR::Value vector{v.ir.UnpackDouble2x32(value)};
142 for (int i = 0; i < 2; ++i) {
143 v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))});
144 }
145 break;
146 }
147 default:
148 throw NotImplementedException("Invalid float format {}", i2f.float_format.Value());
149 }
150}
151} // Anonymous namespace
152
153void TranslatorVisitor::I2F_reg(u64 insn) {
154 if (Is64(insn)) {
155 union {
156 u64 raw;
157 BitField<20, 8, IR::Reg> reg;
158 } const value{insn};
159 const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))};
160 I2F(*this, insn, ir.PackUint2x32(regs));
161 } else {
162 I2F(*this, insn, GetReg20(insn));
163 }
164}
165
166void TranslatorVisitor::I2F_cbuf(u64 insn) {
167 if (Is64(insn)) {
168 I2F(*this, insn, GetPackedCbuf(insn));
169 } else {
170 I2F(*this, insn, GetCbuf(insn));
171 }
172}
173
174void TranslatorVisitor::I2F_imm(u64 insn) {
175 if (Is64(insn)) {
176 I2F(*this, insn, GetPackedImm20(insn));
177 } else {
178 I2F(*this, insn, GetImm20(insn));
179 }
180}
181
182} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
new file mode 100644
index 000000000..5feefc0ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
@@ -0,0 +1,82 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class MaxShift : u64 {
12 U32,
13 Undefined,
14 U64,
15 S64,
16};
17
18IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift,
19 bool right_shift, bool is_signed) {
20 if (!right_shift) {
21 return ir.ShiftLeftLogical(packed_int, safe_shift);
22 }
23 if (is_signed) {
24 return ir.ShiftRightArithmetic(packed_int, safe_shift);
25 }
26 return ir.ShiftRightLogical(packed_int, safe_shift);
27}
28
29void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits,
30 bool right_shift) {
31 union {
32 u64 insn;
33 BitField<0, 8, IR::Reg> dest_reg;
34 BitField<0, 8, IR::Reg> lo_bits_reg;
35 BitField<37, 2, MaxShift> max_shift;
36 BitField<47, 1, u64> cc;
37 BitField<48, 2, u64> x_mode;
38 BitField<50, 1, u64> wrap;
39 } const shf{insn};
40
41 if (shf.cc != 0) {
42 throw NotImplementedException("SHF CC");
43 }
44 if (shf.x_mode != 0) {
45 throw NotImplementedException("SHF X Mode");
46 }
47 if (shf.max_shift == MaxShift::Undefined) {
48 throw NotImplementedException("SHF Use of undefined MaxShift value");
49 }
50 const IR::U32 low_bits{v.X(shf.lo_bits_reg)};
51 const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))};
52 const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)};
53 const IR::U32 safe_shift{shf.wrap != 0
54 ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1)))
55 : v.ir.UMin(shift, max_shift)};
56
57 const bool is_signed{shf.max_shift == MaxShift::S64};
58 const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)};
59 const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)};
60
61 const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)};
62 v.X(shf.dest_reg, result);
63}
64} // Anonymous namespace
65
66void TranslatorVisitor::SHF_l_reg(u64 insn) {
67 SHF(*this, insn, GetReg20(insn), GetReg39(insn), false);
68}
69
70void TranslatorVisitor::SHF_l_imm(u64 insn) {
71 SHF(*this, insn, GetImm20(insn), GetReg39(insn), false);
72}
73
74void TranslatorVisitor::SHF_r_reg(u64 insn) {
75 SHF(*this, insn, GetReg20(insn), GetReg39(insn), true);
76}
77
78void TranslatorVisitor::SHF_r_imm(u64 insn) {
79 SHF(*this, insn, GetImm20(insn), GetReg39(insn), true);
80}
81
82} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
new file mode 100644
index 000000000..1badbacc4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
@@ -0,0 +1,64 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_reg;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<43, 2, u64> mode;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> is_signed;
21 } const imnmx{insn};
22
23 if (imnmx.cc != 0) {
24 throw NotImplementedException("IMNMX CC");
25 }
26
27 if (imnmx.mode != 0) {
28 throw NotImplementedException("IMNMX.MODE");
29 }
30
31 const IR::U1 pred{v.ir.GetPred(imnmx.pred)};
32 const IR::U32 op_a{v.X(imnmx.src_reg)};
33 IR::U32 min;
34 IR::U32 max;
35
36 if (imnmx.is_signed != 0) {
37 min = IR::U32{v.ir.SMin(op_a, op_b)};
38 max = IR::U32{v.ir.SMax(op_a, op_b)};
39 } else {
40 min = IR::U32{v.ir.UMin(op_a, op_b)};
41 max = IR::U32{v.ir.UMax(op_a, op_b)};
42 }
43 if (imnmx.neg_pred != 0) {
44 std::swap(min, max);
45 }
46
47 const IR::U32 result{v.ir.Select(pred, min, max)};
48 v.X(imnmx.dest_reg, result);
49}
50} // Anonymous namespace
51
52void TranslatorVisitor::IMNMX_reg(u64 insn) {
53 IMNMX(*this, insn, GetReg20(insn));
54}
55
56void TranslatorVisitor::IMNMX_cbuf(u64 insn) {
57 IMNMX(*this, insn, GetCbuf(insn));
58}
59
60void TranslatorVisitor::IMNMX_imm(u64 insn) {
61 IMNMX(*this, insn, GetImm20(insn));
62}
63
64} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
new file mode 100644
index 000000000..5ece7678d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
@@ -0,0 +1,36 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<40, 1, u64> tilde;
16 } const popc{insn};
17
18 const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src);
19 const IR::U32 result = v.ir.BitCount(operand);
20 v.X(popc.dest_reg, result);
21}
22} // Anonymous namespace
23
24void TranslatorVisitor::POPC_reg(u64 insn) {
25 POPC(*this, insn, GetReg20(insn));
26}
27
28void TranslatorVisitor::POPC_cbuf(u64 insn) {
29 POPC(*this, insn, GetCbuf(insn));
30}
31
32void TranslatorVisitor::POPC_imm(u64 insn) {
33 POPC(*this, insn, GetImm20(insn));
34}
35
36} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
new file mode 100644
index 000000000..044671943
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
@@ -0,0 +1,86 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b,
12 u64 scale_imm) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> op_a;
17 } const iscadd{insn};
18
19 const bool po{neg_a && neg_b};
20 IR::U32 op_a{v.X(iscadd.op_a)};
21 if (po) {
22 // When PO is present, add one
23 op_b = v.ir.IAdd(op_b, v.ir.Imm32(1));
24 } else {
25 // When PO is not present, the bits are interpreted as negation
26 if (neg_a) {
27 op_a = v.ir.INeg(op_a);
28 }
29 if (neg_b) {
30 op_b = v.ir.INeg(op_b);
31 }
32 }
33 // With the operands already processed, scale A
34 const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))};
35 const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)};
36
37 const IR::U32 result{v.ir.IAdd(scaled_a, op_b)};
38 v.X(iscadd.dest_reg, result);
39
40 if (cc) {
41 v.SetZFlag(v.ir.GetZeroFromOp(result));
42 v.SetSFlag(v.ir.GetSignFromOp(result));
43 const IR::U1 carry{v.ir.GetCarryFromOp(result)};
44 const IR::U1 overflow{v.ir.GetOverflowFromOp(result)};
45 v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry);
46 v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow);
47 }
48}
49
50void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
51 union {
52 u64 raw;
53 BitField<47, 1, u64> cc;
54 BitField<48, 1, u64> neg_b;
55 BitField<49, 1, u64> neg_a;
56 BitField<39, 5, u64> scale;
57 } const iscadd{insn};
58
59 ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale);
60}
61
62} // Anonymous namespace
63
64void TranslatorVisitor::ISCADD_reg(u64 insn) {
65 ISCADD(*this, insn, GetReg20(insn));
66}
67
68void TranslatorVisitor::ISCADD_cbuf(u64 insn) {
69 ISCADD(*this, insn, GetCbuf(insn));
70}
71
72void TranslatorVisitor::ISCADD_imm(u64 insn) {
73 ISCADD(*this, insn, GetImm20(insn));
74}
75
76void TranslatorVisitor::ISCADD32I(u64 insn) {
77 union {
78 u64 raw;
79 BitField<52, 1, u64> cc;
80 BitField<53, 5, u64> scale;
81 } const iscadd{insn};
82
83 return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale);
84}
85
86} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
new file mode 100644
index 000000000..bee10e5b9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
@@ -0,0 +1,58 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
13 CompareOp compare_op, bool is_signed, bool x) {
14 return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
15 : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
16}
17
18void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
19 union {
20 u64 raw;
21 BitField<0, 3, IR::Pred> dest_pred_b;
22 BitField<3, 3, IR::Pred> dest_pred_a;
23 BitField<8, 8, IR::Reg> src_reg_a;
24 BitField<39, 3, IR::Pred> bop_pred;
25 BitField<42, 1, u64> neg_bop_pred;
26 BitField<43, 1, u64> x;
27 BitField<45, 2, BooleanOp> bop;
28 BitField<48, 1, u64> is_signed;
29 BitField<49, 3, CompareOp> compare_op;
30 } const isetp{insn};
31
32 const bool is_signed{isetp.is_signed != 0};
33 const bool x{isetp.x != 0};
34 const BooleanOp bop{isetp.bop};
35 const CompareOp compare_op{isetp.compare_op};
36 const IR::U32 op_a{v.X(isetp.src_reg_a)};
37 const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)};
38 const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)};
39 const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
40 const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
41 v.ir.SetPred(isetp.dest_pred_a, result_a);
42 v.ir.SetPred(isetp.dest_pred_b, result_b);
43}
44} // Anonymous namespace
45
46void TranslatorVisitor::ISETP_reg(u64 insn) {
47 ISETP(*this, insn, GetReg20(insn));
48}
49
50void TranslatorVisitor::ISETP_cbuf(u64 insn) {
51 ISETP(*this, insn, GetCbuf(insn));
52}
53
54void TranslatorVisitor::ISETP_imm(u64 insn) {
55 ISETP(*this, insn, GetImm20(insn));
56}
57
58} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
new file mode 100644
index 000000000..20af68852
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_reg_a;
16 BitField<39, 1, u64> w;
17 BitField<43, 1, u64> x;
18 BitField<47, 1, u64> cc;
19 } const shl{insn};
20
21 if (shl.x != 0) {
22 throw NotImplementedException("SHL.X");
23 }
24 if (shl.cc != 0) {
25 throw NotImplementedException("SHL.CC");
26 }
27 const IR::U32 base{v.X(shl.src_reg_a)};
28 IR::U32 result;
29 if (shl.w != 0) {
30 // When .W is set, the shift value is wrapped
31 // To emulate this we just have to wrap it ourselves.
32 const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))};
33 result = v.ir.ShiftLeftLogical(base, shift);
34 } else {
35 // When .W is not set, the shift value is clamped between 0 and 32.
36 // To emulate this we have to have in mind the special shift of 32, that evaluates as 0.
37 // We can safely evaluate an out of bounds shift according to the SPIR-V specification:
38 //
39 // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical
40 // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than
41 // or equal to the bit width of the components of Base."
42 //
43 // And on the GLASM specification it is also safe to evaluate out of bounds:
44 //
45 // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt
46 // "The results of a shift operation ("<<") are undefined if the value of the second operand
47 // is negative, or greater than or equal to the number of bits in the first operand."
48 //
49 // Emphasis on undefined results in contrast to undefined behavior.
50 //
51 const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
52 const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
53 result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))};
54 }
55 v.X(shl.dest_reg, result);
56}
57} // Anonymous namespace
58
59void TranslatorVisitor::SHL_reg(u64 insn) {
60 SHL(*this, insn, GetReg20(insn));
61}
62
63void TranslatorVisitor::SHL_cbuf(u64 insn) {
64 SHL(*this, insn, GetCbuf(insn));
65}
66
67void TranslatorVisitor::SHL_imm(u64 insn) {
68 SHL(*this, insn, GetImm20(insn));
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
new file mode 100644
index 000000000..be00bb605
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_reg_a;
16 BitField<39, 1, u64> is_wrapped;
17 BitField<40, 1, u64> brev;
18 BitField<43, 1, u64> xmode;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> is_signed;
21 } const shr{insn};
22
23 if (shr.xmode != 0) {
24 throw NotImplementedException("SHR.XMODE");
25 }
26 if (shr.cc != 0) {
27 throw NotImplementedException("SHR.CC");
28 }
29
30 IR::U32 base{v.X(shr.src_reg_a)};
31 if (shr.brev == 1) {
32 base = v.ir.BitReverse(base);
33 }
34 IR::U32 result;
35 const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31));
36 if (shr.is_signed == 1) {
37 result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)};
38 } else {
39 result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)};
40 }
41
42 if (shr.is_wrapped == 0) {
43 const IR::U32 zero{v.ir.Imm32(0)};
44 const IR::U32 safe_bits{v.ir.Imm32(32)};
45
46 const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)};
47 const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)};
48 const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
49 result = IR::U32{v.ir.Select(is_safe, result, clamped_value)};
50 }
51 v.X(shr.dest_reg, result);
52}
53} // Anonymous namespace
54
55void TranslatorVisitor::SHR_reg(u64 insn) {
56 SHR(*this, insn, GetReg20(insn));
57}
58
59void TranslatorVisitor::SHR_cbuf(u64 insn) {
60 SHR(*this, insn, GetCbuf(insn));
61}
62
63void TranslatorVisitor::SHR_imm(u64 insn) {
64 SHR(*this, insn, GetImm20(insn));
65}
66} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
new file mode 100644
index 000000000..2932cdc42
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
@@ -0,0 +1,135 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class SelectMode : u64 {
12 Default,
13 CLO,
14 CHI,
15 CSFU,
16 CBCC,
17};
18
19enum class Half : u64 {
20 H0, // Least-significant bits (15:0)
21 H1, // Most-significant bits (31:16)
22};
23
24IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
25 const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
26 return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
27}
28
29void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
30 SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
31 union {
32 u64 raw;
33 BitField<0, 8, IR::Reg> dest_reg;
34 BitField<8, 8, IR::Reg> src_reg_a;
35 BitField<47, 1, u64> cc;
36 BitField<48, 1, u64> is_a_signed;
37 BitField<49, 1, u64> is_b_signed;
38 BitField<53, 1, Half> half_a;
39 } const xmad{insn};
40
41 if (x) {
42 throw NotImplementedException("XMAD X");
43 }
44 const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)};
45 const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)};
46
47 IR::U32 product{v.ir.IMul(op_a, op_b)};
48 if (psl) {
49 // .PSL shifts the product 16 bits
50 product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16));
51 }
52 const IR::U32 op_c{[&]() -> IR::U32 {
53 switch (select_mode) {
54 case SelectMode::Default:
55 return src_c;
56 case SelectMode::CLO:
57 return ExtractHalf(v, src_c, Half::H0, false);
58 case SelectMode::CHI:
59 return ExtractHalf(v, src_c, Half::H1, false);
60 case SelectMode::CBCC:
61 return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c);
62 case SelectMode::CSFU:
63 throw NotImplementedException("XMAD CSFU");
64 }
65 throw NotImplementedException("Invalid XMAD select mode {}", select_mode);
66 }()};
67 IR::U32 result{v.ir.IAdd(product, op_c)};
68 if (mrg) {
69 // .MRG inserts src_b [15:0] into result's [31:16].
70 const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
71 result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
72 }
73 if (xmad.cc) {
74 throw NotImplementedException("XMAD CC");
75 }
76 // Store result
77 v.X(xmad.dest_reg, result);
78}
79} // Anonymous namespace
80
81void TranslatorVisitor::XMAD_reg(u64 insn) {
82 union {
83 u64 raw;
84 BitField<35, 1, Half> half_b;
85 BitField<36, 1, u64> psl;
86 BitField<37, 1, u64> mrg;
87 BitField<38, 1, u64> x;
88 BitField<50, 3, SelectMode> select_mode;
89 } const xmad{insn};
90
91 XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
92 xmad.mrg != 0, xmad.x != 0);
93}
94
95void TranslatorVisitor::XMAD_rc(u64 insn) {
96 union {
97 u64 raw;
98 BitField<50, 2, SelectMode> select_mode;
99 BitField<52, 1, Half> half_b;
100 BitField<54, 1, u64> x;
101 } const xmad{insn};
102
103 XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false,
104 xmad.x != 0);
105}
106
107void TranslatorVisitor::XMAD_cr(u64 insn) {
108 union {
109 u64 raw;
110 BitField<50, 2, SelectMode> select_mode;
111 BitField<52, 1, Half> half_b;
112 BitField<54, 1, u64> x;
113 BitField<55, 1, u64> psl;
114 BitField<56, 1, u64> mrg;
115 } const xmad{insn};
116
117 XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
118 xmad.mrg != 0, xmad.x != 0);
119}
120
121void TranslatorVisitor::XMAD_imm(u64 insn) {
122 union {
123 u64 raw;
124 BitField<20, 16, u64> src_b;
125 BitField<36, 1, u64> psl;
126 BitField<37, 1, u64> mrg;
127 BitField<38, 1, u64> x;
128 BitField<50, 3, SelectMode> select_mode;
129 } const xmad{insn};
130
131 XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode,
132 Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
133}
134
135} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
new file mode 100644
index 000000000..53e8d8923
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
@@ -0,0 +1,126 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class IntegerWidth : u64 {
12 Byte,
13 Short,
14 Word,
15};
16
17[[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) {
18 switch (width) {
19 case IntegerWidth::Byte:
20 return ir.Imm32(8);
21 case IntegerWidth::Short:
22 return ir.Imm32(16);
23 case IntegerWidth::Word:
24 return ir.Imm32(32);
25 default:
26 throw NotImplementedException("Invalid width {}", width);
27 }
28}
29
30[[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src,
31 IntegerWidth dst_width) {
32 const IR::U32 zero{ir.Imm32(0)};
33 const IR::U32 count{WidthSize(ir, dst_width)};
34 return ir.BitFieldExtract(src, zero, count, false);
35}
36
37[[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width,
38 bool dst_signed, bool src_signed) {
39 IR::U32 min{};
40 IR::U32 max{};
41 const IR::U32 zero{ir.Imm32(0)};
42 switch (dst_width) {
43 case IntegerWidth::Byte:
44 min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero;
45 max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff);
46 break;
47 case IntegerWidth::Short:
48 min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero;
49 max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff);
50 break;
51 case IntegerWidth::Word:
52 min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero;
53 max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff);
54 break;
55 default:
56 throw NotImplementedException("Invalid width {}", dst_width);
57 }
58 const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src};
59 return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max);
60}
61
62void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) {
63 union {
64 u64 insn;
65 BitField<0, 8, IR::Reg> dest_reg;
66 BitField<8, 2, IntegerWidth> dst_fmt;
67 BitField<12, 1, u64> dst_fmt_sign;
68 BitField<10, 2, IntegerWidth> src_fmt;
69 BitField<13, 1, u64> src_fmt_sign;
70 BitField<41, 3, u64> selector;
71 BitField<45, 1, u64> neg;
72 BitField<47, 1, u64> cc;
73 BitField<49, 1, u64> abs;
74 BitField<50, 1, u64> sat;
75 } const i2i{insn};
76
77 if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) {
78 throw NotImplementedException("16-bit source format incompatible with selector {}",
79 i2i.selector);
80 }
81 if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) {
82 throw NotImplementedException("32-bit source format incompatible with selector {}",
83 i2i.selector);
84 }
85
86 const s32 selector{static_cast<s32>(i2i.selector)};
87 const IR::U32 offset{v.ir.Imm32(selector * 8)};
88 const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)};
89 const bool src_signed{i2i.src_fmt_sign != 0};
90 const bool dst_signed{i2i.dst_fmt_sign != 0};
91 const bool sat{i2i.sat != 0};
92
93 IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)};
94 if (i2i.abs != 0) {
95 src_values = v.ir.IAbs(src_values);
96 }
97 if (i2i.neg != 0) {
98 src_values = v.ir.INeg(src_values);
99 }
100 const IR::U32 result{
101 sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed)
102 : ConvertInteger(v.ir, src_values, i2i.dst_fmt)};
103
104 v.X(i2i.dest_reg, result);
105 if (i2i.cc != 0) {
106 v.SetZFlag(v.ir.GetZeroFromOp(result));
107 v.SetSFlag(v.ir.GetSignFromOp(result));
108 v.ResetCFlag();
109 v.ResetOFlag();
110 }
111}
112} // Anonymous namespace
113
114void TranslatorVisitor::I2I_reg(u64 insn) {
115 I2I(*this, insn, GetReg20(insn));
116}
117
118void TranslatorVisitor::I2I_cbuf(u64 insn) {
119 I2I(*this, insn, GetCbuf(insn));
120}
121
122void TranslatorVisitor::I2I_imm(u64 insn) {
123 I2I(*this, insn, GetImm20(insn));
124}
125
126} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
new file mode 100644
index 000000000..9b85f8059
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
@@ -0,0 +1,53 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 Default,
13 Patch,
14 Prim,
15 Attr,
16};
17
18enum class Shift : u64 {
19 Default,
20 U16,
21 B32,
22};
23
24} // Anonymous namespace
25
26void TranslatorVisitor::ISBERD(u64 insn) {
27 union {
28 u64 raw;
29 BitField<0, 8, IR::Reg> dest_reg;
30 BitField<8, 8, IR::Reg> src_reg;
31 BitField<31, 1, u64> skew;
32 BitField<32, 1, u64> o;
33 BitField<33, 2, Mode> mode;
34 BitField<47, 2, Shift> shift;
35 } const isberd{insn};
36
37 if (isberd.skew != 0) {
38 throw NotImplementedException("SKEW");
39 }
40 if (isberd.o != 0) {
41 throw NotImplementedException("O");
42 }
43 if (isberd.mode != Mode::Default) {
44 throw NotImplementedException("Mode {}", isberd.mode.Value());
45 }
46 if (isberd.shift != Shift::Default) {
47 throw NotImplementedException("Shift {}", isberd.shift.Value());
48 }
49 LOG_WARNING(Shader, "(STUBBED) called");
50 X(isberd.dest_reg, X(isberd.src_reg));
51}
52
53} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
new file mode 100644
index 000000000..2300088e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
9
10namespace Shader::Maxwell {
11using namespace LDC;
12namespace {
13std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index,
14 const IR::U32& reg, const IR::U32& imm) {
15 switch (mode) {
16 case Mode::Default:
17 return {imm_index, ir.IAdd(reg, imm)};
18 default:
19 break;
20 }
21 throw NotImplementedException("Mode {}", mode);
22}
23} // Anonymous namespace
24
25void TranslatorVisitor::LDC(u64 insn) {
26 const Encoding ldc{insn};
27 const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))};
28 const IR::U32 reg{X(ldc.src_reg)};
29 const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))};
30 const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)};
31 switch (ldc.size) {
32 case Size::U8:
33 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)});
34 break;
35 case Size::S8:
36 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)});
37 break;
38 case Size::U16:
39 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)});
40 break;
41 case Size::S16:
42 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)});
43 break;
44 case Size::B32:
45 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)});
46 break;
47 case Size::B64: {
48 if (!IR::IsAligned(ldc.dest_reg, 2)) {
49 throw NotImplementedException("Unaligned destination register");
50 }
51 const IR::Value vector{ir.GetCbuf(index, offset, 64, false)};
52 for (int i = 0; i < 2; ++i) {
53 X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
54 }
55 break;
56 }
57 default:
58 throw NotImplementedException("Invalid size {}", ldc.size.Value());
59 }
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
new file mode 100644
index 000000000..3074ea0e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
@@ -0,0 +1,39 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/reg.h"
10
11namespace Shader::Maxwell::LDC {
12
13enum class Mode : u64 {
14 Default,
15 IL,
16 IS,
17 ISL,
18};
19
20enum class Size : u64 {
21 U8,
22 S8,
23 U16,
24 S16,
25 B32,
26 B64,
27};
28
29union Encoding {
30 u64 raw;
31 BitField<0, 8, IR::Reg> dest_reg;
32 BitField<8, 8, IR::Reg> src_reg;
33 BitField<20, 16, s64> offset;
34 BitField<36, 5, u64> index;
35 BitField<44, 2, Mode> mode;
36 BitField<48, 3, Size> size;
37};
38
39} // namespace Shader::Maxwell::LDC
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
new file mode 100644
index 000000000..4a0f04e47
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
@@ -0,0 +1,108 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale,
12 bool neg, bool x) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> offset_lo_reg;
17 BitField<47, 1, u64> cc;
18 BitField<48, 3, IR::Pred> pred;
19 } const lea{insn};
20
21 if (x) {
22 throw NotImplementedException("LEA.HI X");
23 }
24 if (lea.pred != IR::Pred::PT) {
25 throw NotImplementedException("LEA.HI Pred");
26 }
27 if (lea.cc != 0) {
28 throw NotImplementedException("LEA.HI CC");
29 }
30
31 const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
32 const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))};
33 const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset};
34
35 const s32 hi_scale{32 - static_cast<s32>(scale)};
36 const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))};
37 const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)};
38
39 IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)};
40 v.X(lea.dest_reg, result);
41}
42
43void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) {
44 union {
45 u64 insn;
46 BitField<0, 8, IR::Reg> dest_reg;
47 BitField<8, 8, IR::Reg> offset_lo_reg;
48 BitField<39, 5, u64> scale;
49 BitField<45, 1, u64> neg;
50 BitField<46, 1, u64> x;
51 BitField<47, 1, u64> cc;
52 BitField<48, 3, IR::Pred> pred;
53 } const lea{insn};
54 if (lea.x != 0) {
55 throw NotImplementedException("LEA.LO X");
56 }
57 if (lea.pred != IR::Pred::PT) {
58 throw NotImplementedException("LEA.LO Pred");
59 }
60 if (lea.cc != 0) {
61 throw NotImplementedException("LEA.LO CC");
62 }
63
64 const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
65 const s32 scale{static_cast<s32>(lea.scale)};
66 const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo};
67 const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))};
68
69 IR::U32 result{v.ir.IAdd(base, scaled_offset)};
70 v.X(lea.dest_reg, result);
71}
72} // Anonymous namespace
73
74void TranslatorVisitor::LEA_hi_reg(u64 insn) {
75 union {
76 u64 insn;
77 BitField<28, 5, u64> scale;
78 BitField<37, 1, u64> neg;
79 BitField<38, 1, u64> x;
80 } const lea{insn};
81
82 LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
83}
84
85void TranslatorVisitor::LEA_hi_cbuf(u64 insn) {
86 union {
87 u64 insn;
88 BitField<51, 5, u64> scale;
89 BitField<56, 1, u64> neg;
90 BitField<57, 1, u64> x;
91 } const lea{insn};
92
93 LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
94}
95
96void TranslatorVisitor::LEA_lo_reg(u64 insn) {
97 LEA_lo(*this, insn, GetReg20(insn));
98}
99
100void TranslatorVisitor::LEA_lo_cbuf(u64 insn) {
101 LEA_lo(*this, insn, GetCbuf(insn));
102}
103
104void TranslatorVisitor::LEA_lo_imm(u64 insn) {
105 LEA_lo(*this, insn, GetImm20(insn));
106}
107
108} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
new file mode 100644
index 000000000..924fb7a40
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
@@ -0,0 +1,196 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/ir/ir_emitter.h"
9#include "shader_recompiler/frontend/maxwell/opcodes.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Size : u64 {
15 B32,
16 B64,
17 B96,
18 B128,
19};
20
21enum class InterpolationMode : u64 {
22 Pass,
23 Multiply,
24 Constant,
25 Sc,
26};
27
28enum class SampleMode : u64 {
29 Default,
30 Centroid,
31 Offset,
32};
33
34u32 NumElements(Size size) {
35 switch (size) {
36 case Size::B32:
37 return 1;
38 case Size::B64:
39 return 2;
40 case Size::B96:
41 return 3;
42 case Size::B128:
43 return 4;
44 }
45 throw InvalidArgument("Invalid size {}", size);
46}
47
48template <typename F>
49void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) {
50 const IR::U32 index_value{v.X(index_reg)};
51 for (u32 element = 0; element < num_elements; ++element) {
52 const IR::U32 final_offset{
53 element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}};
54 f(element, final_offset);
55 }
56}
57
58} // Anonymous namespace
59
60void TranslatorVisitor::ALD(u64 insn) {
61 union {
62 u64 raw;
63 BitField<0, 8, IR::Reg> dest_reg;
64 BitField<8, 8, IR::Reg> index_reg;
65 BitField<20, 10, u64> absolute_offset;
66 BitField<20, 11, s64> relative_offset;
67 BitField<39, 8, IR::Reg> vertex_reg;
68 BitField<32, 1, u64> o;
69 BitField<31, 1, u64> patch;
70 BitField<47, 2, Size> size;
71 } const ald{insn};
72
73 const u64 offset{ald.absolute_offset.Value()};
74 if (offset % 4 != 0) {
75 throw NotImplementedException("Unaligned absolute offset {}", offset);
76 }
77 const IR::U32 vertex{X(ald.vertex_reg)};
78 const u32 num_elements{NumElements(ald.size)};
79 if (ald.index_reg == IR::Reg::RZ) {
80 for (u32 element = 0; element < num_elements; ++element) {
81 if (ald.patch != 0) {
82 const IR::Patch patch{offset / 4 + element};
83 F(ald.dest_reg + static_cast<int>(element), ir.GetPatch(patch));
84 } else {
85 const IR::Attribute attr{offset / 4 + element};
86 F(ald.dest_reg + static_cast<int>(element), ir.GetAttribute(attr, vertex));
87 }
88 }
89 return;
90 }
91 if (ald.patch != 0) {
92 throw NotImplementedException("Indirect patch read");
93 }
94 HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
95 F(ald.dest_reg + static_cast<int>(element), ir.GetAttributeIndexed(final_offset, vertex));
96 });
97}
98
99void TranslatorVisitor::AST(u64 insn) {
100 union {
101 u64 raw;
102 BitField<0, 8, IR::Reg> src_reg;
103 BitField<8, 8, IR::Reg> index_reg;
104 BitField<20, 10, u64> absolute_offset;
105 BitField<20, 11, s64> relative_offset;
106 BitField<31, 1, u64> patch;
107 BitField<39, 8, IR::Reg> vertex_reg;
108 BitField<47, 2, Size> size;
109 } const ast{insn};
110
111 if (ast.index_reg != IR::Reg::RZ) {
112 throw NotImplementedException("Indexed store");
113 }
114 const u64 offset{ast.absolute_offset.Value()};
115 if (offset % 4 != 0) {
116 throw NotImplementedException("Unaligned absolute offset {}", offset);
117 }
118 const IR::U32 vertex{X(ast.vertex_reg)};
119 const u32 num_elements{NumElements(ast.size)};
120 if (ast.index_reg == IR::Reg::RZ) {
121 for (u32 element = 0; element < num_elements; ++element) {
122 if (ast.patch != 0) {
123 const IR::Patch patch{offset / 4 + element};
124 ir.SetPatch(patch, F(ast.src_reg + static_cast<int>(element)));
125 } else {
126 const IR::Attribute attr{offset / 4 + element};
127 ir.SetAttribute(attr, F(ast.src_reg + static_cast<int>(element)), vertex);
128 }
129 }
130 return;
131 }
132 if (ast.patch != 0) {
133 throw NotImplementedException("Indexed tessellation patch store");
134 }
135 HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
136 ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast<int>(element)), vertex);
137 });
138}
139
140void TranslatorVisitor::IPA(u64 insn) {
141 // IPA is the instruction used to read varyings from a fragment shader.
142 // gl_FragCoord is mapped to the gl_Position attribute.
143 // It yields unknown results when used outside of the fragment shader stage.
144 union {
145 u64 raw;
146 BitField<0, 8, IR::Reg> dest_reg;
147 BitField<8, 8, IR::Reg> index_reg;
148 BitField<20, 8, IR::Reg> multiplier;
149 BitField<30, 8, IR::Attribute> attribute;
150 BitField<38, 1, u64> idx;
151 BitField<51, 1, u64> sat;
152 BitField<52, 2, SampleMode> sample_mode;
153 BitField<54, 2, InterpolationMode> interpolation_mode;
154 } const ipa{insn};
155
156 // Indexed IPAs are used for indexed varyings.
157 // For example:
158 //
159 // in vec4 colors[4];
160 // uniform int idx;
161 // void main() {
162 // gl_FragColor = colors[idx];
163 // }
164 const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ};
165 const IR::Attribute attribute{ipa.attribute};
166 IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg))
167 : ir.GetAttribute(attribute)};
168 if (IR::IsGeneric(attribute)) {
169 const ProgramHeader& sph{env.SPH()};
170 const u32 attr_index{IR::GenericAttributeIndex(attribute)};
171 const u32 element{static_cast<u32>(attribute) % 4};
172 const std::array input_map{sph.ps.GenericInputMap(attr_index)};
173 const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective};
174 if (is_perspective) {
175 const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)};
176 value = ir.FPMul(value, position_w);
177 }
178 }
179 if (ipa.interpolation_mode == InterpolationMode::Multiply) {
180 value = ir.FPMul(value, F(ipa.multiplier));
181 }
182
183 // Saturated IPAs are generally generated out of clamped varyings.
184 // For example: clamp(some_varying, 0.0, 1.0)
185 const bool is_saturated{ipa.sat != 0};
186 if (is_saturated) {
187 if (attribute == IR::Attribute::FrontFace) {
188 throw NotImplementedException("IPA.SAT on FrontFace");
189 }
190 value = ir.FPSaturate(value);
191 }
192
193 F(ipa.dest_reg, value);
194}
195
196} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
new file mode 100644
index 000000000..d2a1dbf61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
@@ -0,0 +1,218 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Size : u64 {
12 U8,
13 S8,
14 U16,
15 S16,
16 B32,
17 B64,
18 B128,
19};
20
21IR::U32 Offset(TranslatorVisitor& v, u64 insn) {
22 union {
23 u64 raw;
24 BitField<8, 8, IR::Reg> offset_reg;
25 BitField<20, 24, u64> absolute_offset;
26 BitField<20, 24, s64> relative_offset;
27 } const encoding{insn};
28
29 if (encoding.offset_reg == IR::Reg::RZ) {
30 return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset));
31 } else {
32 const s32 relative{static_cast<s32>(encoding.relative_offset.Value())};
33 return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
34 }
35}
36
37std::pair<IR::U32, IR::U32> WordOffset(TranslatorVisitor& v, u64 insn) {
38 const IR::U32 offset{Offset(v, insn)};
39 if (offset.IsImmediate()) {
40 return {v.ir.Imm32(offset.U32() / 4), offset};
41 } else {
42 return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset};
43 }
44}
45
46std::pair<int, bool> GetSize(u64 insn) {
47 union {
48 u64 raw;
49 BitField<48, 3, Size> size;
50 } const encoding{insn};
51
52 switch (encoding.size) {
53 case Size::U8:
54 return {8, false};
55 case Size::S8:
56 return {8, true};
57 case Size::U16:
58 return {16, false};
59 case Size::S16:
60 return {16, true};
61 case Size::B32:
62 return {32, false};
63 case Size::B64:
64 return {64, false};
65 case Size::B128:
66 return {128, false};
67 default:
68 throw NotImplementedException("Invalid size {}", encoding.size.Value());
69 }
70}
71
72IR::Reg Reg(u64 insn) {
73 union {
74 u64 raw;
75 BitField<0, 8, IR::Reg> reg;
76 } const encoding{insn};
77
78 return encoding.reg;
79}
80
81IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) {
82 return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24));
83}
84
85IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) {
86 return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16));
87}
88
89IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) {
90 const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())};
91 const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)};
92 return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))};
93}
94} // Anonymous namespace
95
96void TranslatorVisitor::LDL(u64 insn) {
97 const auto [word_offset, offset]{WordOffset(*this, insn)};
98 const IR::U32 word{LoadLocal(*this, word_offset, offset)};
99 const IR::Reg dest{Reg(insn)};
100 const auto [bit_size, is_signed]{GetSize(insn)};
101 switch (bit_size) {
102 case 8: {
103 const IR::U32 bit{ByteOffset(ir, offset)};
104 X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed));
105 break;
106 }
107 case 16: {
108 const IR::U32 bit{ShortOffset(ir, offset)};
109 X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed));
110 break;
111 }
112 case 32:
113 case 64:
114 case 128:
115 if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
116 throw NotImplementedException("Unaligned destination register {}", dest);
117 }
118 X(dest, word);
119 for (int i = 1; i < bit_size / 32; ++i) {
120 const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))};
121 const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))};
122 X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset));
123 }
124 break;
125 }
126}
127
128void TranslatorVisitor::LDS(u64 insn) {
129 const IR::U32 offset{Offset(*this, insn)};
130 const IR::Reg dest{Reg(insn)};
131 const auto [bit_size, is_signed]{GetSize(insn)};
132 const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)};
133 switch (bit_size) {
134 case 8:
135 case 16:
136 case 32:
137 X(dest, IR::U32{value});
138 break;
139 case 64:
140 case 128:
141 if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
142 throw NotImplementedException("Unaligned destination register {}", dest);
143 }
144 for (int element = 0; element < bit_size / 32; ++element) {
145 X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))});
146 }
147 break;
148 }
149}
150
151void TranslatorVisitor::STL(u64 insn) {
152 const auto [word_offset, offset]{WordOffset(*this, insn)};
153 if (offset.IsImmediate()) {
154 // TODO: Support storing out of bounds at runtime
155 if (offset.U32() >= env.LocalMemorySize()) {
156 LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping",
157 offset.U32(), env.LocalMemorySize());
158 return;
159 }
160 }
161 const IR::Reg reg{Reg(insn)};
162 const IR::U32 src{X(reg)};
163 const int bit_size{GetSize(insn).first};
164 switch (bit_size) {
165 case 8: {
166 const IR::U32 bit{ByteOffset(ir, offset)};
167 const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))};
168 ir.WriteLocal(word_offset, value);
169 break;
170 }
171 case 16: {
172 const IR::U32 bit{ShortOffset(ir, offset)};
173 const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))};
174 ir.WriteLocal(word_offset, value);
175 break;
176 }
177 case 32:
178 case 64:
179 case 128:
180 if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) {
181 throw NotImplementedException("Unaligned source register");
182 }
183 ir.WriteLocal(word_offset, src);
184 for (int i = 1; i < bit_size / 32; ++i) {
185 ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i));
186 }
187 break;
188 }
189}
190
191void TranslatorVisitor::STS(u64 insn) {
192 const IR::U32 offset{Offset(*this, insn)};
193 const IR::Reg reg{Reg(insn)};
194 const int bit_size{GetSize(insn).first};
195 switch (bit_size) {
196 case 8:
197 case 16:
198 case 32:
199 ir.WriteShared(bit_size, offset, X(reg));
200 break;
201 case 64:
202 if (!IR::IsAligned(reg, 2)) {
203 throw NotImplementedException("Unaligned source register {}", reg);
204 }
205 ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1)));
206 break;
207 case 128: {
208 if (!IR::IsAligned(reg, 2)) {
209 throw NotImplementedException("Unaligned source register {}", reg);
210 }
211 const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))};
212 ir.WriteShared(128, offset, vector);
213 break;
214 }
215 }
216}
217
218} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
new file mode 100644
index 000000000..36c5cff2f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
@@ -0,0 +1,184 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class LoadSize : u64 {
14 U8, // Zero-extend
15 S8, // Sign-extend
16 U16, // Zero-extend
17 S16, // Sign-extend
18 B32,
19 B64,
20 B128,
21 U128, // ???
22};
23
24enum class StoreSize : u64 {
25 U8, // Zero-extend
26 S8, // Sign-extend
27 U16, // Zero-extend
28 S16, // Sign-extend
29 B32,
30 B64,
31 B128,
32};
33
34// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
35enum class LoadCache : u64 {
36 CA, // Cache at all levels, likely to be accessed again
37 CG, // Cache at global level (cache in L2 and below, not L1)
38 CI, // ???
39 CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again)
40};
41
42// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
43enum class StoreCache : u64 {
44 WB, // Cache write-back all coherent levels
45 CG, // Cache at global level
46 CS, // Cache streaming, likely to be accessed once
47 WT, // Cache write-through (to system memory)
48};
49
50IR::U64 Address(TranslatorVisitor& v, u64 insn) {
51 union {
52 u64 raw;
53 BitField<8, 8, IR::Reg> addr_reg;
54 BitField<20, 24, s64> addr_offset;
55 BitField<20, 24, u64> rz_addr_offset;
56 BitField<45, 1, u64> e;
57 } const mem{insn};
58
59 const IR::U64 address{[&]() -> IR::U64 {
60 if (mem.e == 0) {
61 // LDG/STG without .E uses a 32-bit pointer, zero-extend it
62 return v.ir.UConvert(64, v.X(mem.addr_reg));
63 }
64 if (!IR::IsAligned(mem.addr_reg, 2)) {
65 throw NotImplementedException("Unaligned address register");
66 }
67 // Pack two registers to build the 64-bit address
68 return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1)));
69 }()};
70 const u64 addr_offset{[&]() -> u64 {
71 if (mem.addr_reg == IR::Reg::RZ) {
72 // When RZ is used, the address is an absolute address
73 return static_cast<u64>(mem.rz_addr_offset.Value());
74 } else {
75 return static_cast<u64>(mem.addr_offset.Value());
76 }
77 }()};
78 // Apply the offset
79 return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
80}
81} // Anonymous namespace
82
83void TranslatorVisitor::LDG(u64 insn) {
84 // LDG loads global memory into registers
85 union {
86 u64 raw;
87 BitField<0, 8, IR::Reg> dest_reg;
88 BitField<46, 2, LoadCache> cache;
89 BitField<48, 3, LoadSize> size;
90 } const ldg{insn};
91
92 // Pointer to load data from
93 const IR::U64 address{Address(*this, insn)};
94 const IR::Reg dest_reg{ldg.dest_reg};
95 switch (ldg.size) {
96 case LoadSize::U8:
97 X(dest_reg, ir.LoadGlobalU8(address));
98 break;
99 case LoadSize::S8:
100 X(dest_reg, ir.LoadGlobalS8(address));
101 break;
102 case LoadSize::U16:
103 X(dest_reg, ir.LoadGlobalU16(address));
104 break;
105 case LoadSize::S16:
106 X(dest_reg, ir.LoadGlobalS16(address));
107 break;
108 case LoadSize::B32:
109 X(dest_reg, ir.LoadGlobal32(address));
110 break;
111 case LoadSize::B64: {
112 if (!IR::IsAligned(dest_reg, 2)) {
113 throw NotImplementedException("Unaligned data registers");
114 }
115 const IR::Value vector{ir.LoadGlobal64(address)};
116 for (int i = 0; i < 2; ++i) {
117 X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
118 }
119 break;
120 }
121 case LoadSize::B128:
122 case LoadSize::U128: {
123 if (!IR::IsAligned(dest_reg, 4)) {
124 throw NotImplementedException("Unaligned data registers");
125 }
126 const IR::Value vector{ir.LoadGlobal128(address)};
127 for (int i = 0; i < 4; ++i) {
128 X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
129 }
130 break;
131 }
132 default:
133 throw NotImplementedException("Invalid LDG size {}", ldg.size.Value());
134 }
135}
136
137void TranslatorVisitor::STG(u64 insn) {
138 // STG stores registers into global memory.
139 union {
140 u64 raw;
141 BitField<0, 8, IR::Reg> data_reg;
142 BitField<46, 2, StoreCache> cache;
143 BitField<48, 3, StoreSize> size;
144 } const stg{insn};
145
146 // Pointer to store data into
147 const IR::U64 address{Address(*this, insn)};
148 const IR::Reg data_reg{stg.data_reg};
149 switch (stg.size) {
150 case StoreSize::U8:
151 ir.WriteGlobalU8(address, X(data_reg));
152 break;
153 case StoreSize::S8:
154 ir.WriteGlobalS8(address, X(data_reg));
155 break;
156 case StoreSize::U16:
157 ir.WriteGlobalU16(address, X(data_reg));
158 break;
159 case StoreSize::S16:
160 ir.WriteGlobalS16(address, X(data_reg));
161 break;
162 case StoreSize::B32:
163 ir.WriteGlobal32(address, X(data_reg));
164 break;
165 case StoreSize::B64: {
166 if (!IR::IsAligned(data_reg, 2)) {
167 throw NotImplementedException("Unaligned data registers");
168 }
169 const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))};
170 ir.WriteGlobal64(address, vector);
171 break;
172 }
173 case StoreSize::B128:
174 if (!IR::IsAligned(data_reg, 4)) {
175 throw NotImplementedException("Unaligned data registers");
176 }
177 const IR::Value vector{
178 ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))};
179 ir.WriteGlobal128(address, vector);
180 break;
181 }
182}
183
184} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
new file mode 100644
index 000000000..92cd27ed4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
@@ -0,0 +1,116 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12enum class LogicalOp : u64 {
13 AND,
14 OR,
15 XOR,
16 PASS_B,
17};
18
19[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1,
20 const IR::U32& operand_2, LogicalOp op) {
21 switch (op) {
22 case LogicalOp::AND:
23 return ir.BitwiseAnd(operand_1, operand_2);
24 case LogicalOp::OR:
25 return ir.BitwiseOr(operand_1, operand_2);
26 case LogicalOp::XOR:
27 return ir.BitwiseXor(operand_1, operand_2);
28 case LogicalOp::PASS_B:
29 return operand_2;
30 default:
31 throw NotImplementedException("Invalid Logical operation {}", op);
32 }
33}
34
35void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b,
36 LogicalOp bit_op, std::optional<PredicateOp> pred_op = std::nullopt,
37 IR::Pred dest_pred = IR::Pred::PT) {
38 union {
39 u64 insn;
40 BitField<0, 8, IR::Reg> dest_reg;
41 BitField<8, 8, IR::Reg> src_reg;
42 } const lop{insn};
43
44 if (x) {
45 throw NotImplementedException("X");
46 }
47 IR::U32 op_a{v.X(lop.src_reg)};
48 if (inv_a != 0) {
49 op_a = v.ir.BitwiseNot(op_a);
50 }
51 if (inv_b != 0) {
52 op_b = v.ir.BitwiseNot(op_b);
53 }
54
55 const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)};
56 if (pred_op) {
57 const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)};
58 v.ir.SetPred(dest_pred, pred_result);
59 }
60 if (cc) {
61 if (bit_op == LogicalOp::PASS_B) {
62 v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0)));
63 v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true));
64 } else {
65 v.SetZFlag(v.ir.GetZeroFromOp(result));
66 v.SetSFlag(v.ir.GetSignFromOp(result));
67 }
68 v.ResetCFlag();
69 v.ResetOFlag();
70 }
71 v.X(lop.dest_reg, result);
72}
73
74void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
75 union {
76 u64 insn;
77 BitField<39, 1, u64> inv_a;
78 BitField<40, 1, u64> inv_b;
79 BitField<41, 2, LogicalOp> bit_op;
80 BitField<43, 1, u64> x;
81 BitField<44, 2, PredicateOp> pred_op;
82 BitField<47, 1, u64> cc;
83 BitField<48, 3, IR::Pred> dest_pred;
84 } const lop{insn};
85
86 LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op,
87 lop.pred_op, lop.dest_pred);
88}
89} // Anonymous namespace
90
91void TranslatorVisitor::LOP_reg(u64 insn) {
92 LOP(*this, insn, GetReg20(insn));
93}
94
95void TranslatorVisitor::LOP_cbuf(u64 insn) {
96 LOP(*this, insn, GetCbuf(insn));
97}
98
99void TranslatorVisitor::LOP_imm(u64 insn) {
100 LOP(*this, insn, GetImm20(insn));
101}
102
103void TranslatorVisitor::LOP32I(u64 insn) {
104 union {
105 u64 raw;
106 BitField<53, 2, LogicalOp> bit_op;
107 BitField<57, 1, u64> x;
108 BitField<52, 1, u64> cc;
109 BitField<55, 1, u64> inv_a;
110 BitField<56, 1, u64> inv_b;
111 } const lop32i{insn};
112
113 LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0,
114 lop32i.inv_b != 0, lop32i.bit_op);
115}
116} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
new file mode 100644
index 000000000..e0fe47912
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
@@ -0,0 +1,122 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651
13// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
14IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
15 u64 ttbl) {
16 IR::U32 r{ir.Imm32(0)};
17 const IR::U32 not_a{ir.BitwiseNot(a)};
18 const IR::U32 not_b{ir.BitwiseNot(b)};
19 const IR::U32 not_c{ir.BitwiseNot(c)};
20 if (ttbl & 0x01) {
21 // r |= ~a & ~b & ~c;
22 const auto lhs{ir.BitwiseAnd(not_a, not_b)};
23 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
24 r = ir.BitwiseOr(r, rhs);
25 }
26 if (ttbl & 0x02) {
27 // r |= ~a & ~b & c;
28 const auto lhs{ir.BitwiseAnd(not_a, not_b)};
29 const auto rhs{ir.BitwiseAnd(lhs, c)};
30 r = ir.BitwiseOr(r, rhs);
31 }
32 if (ttbl & 0x04) {
33 // r |= ~a & b & ~c;
34 const auto lhs{ir.BitwiseAnd(not_a, b)};
35 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
36 r = ir.BitwiseOr(r, rhs);
37 }
38 if (ttbl & 0x08) {
39 // r |= ~a & b & c;
40 const auto lhs{ir.BitwiseAnd(not_a, b)};
41 const auto rhs{ir.BitwiseAnd(lhs, c)};
42 r = ir.BitwiseOr(r, rhs);
43 }
44 if (ttbl & 0x10) {
45 // r |= a & ~b & ~c;
46 const auto lhs{ir.BitwiseAnd(a, not_b)};
47 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
48 r = ir.BitwiseOr(r, rhs);
49 }
50 if (ttbl & 0x20) {
51 // r |= a & ~b & c;
52 const auto lhs{ir.BitwiseAnd(a, not_b)};
53 const auto rhs{ir.BitwiseAnd(lhs, c)};
54 r = ir.BitwiseOr(r, rhs);
55 }
56 if (ttbl & 0x40) {
57 // r |= a & b & ~c;
58 const auto lhs{ir.BitwiseAnd(a, b)};
59 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
60 r = ir.BitwiseOr(r, rhs);
61 }
62 if (ttbl & 0x80) {
63 // r |= a & b & c;
64 const auto lhs{ir.BitwiseAnd(a, b)};
65 const auto rhs{ir.BitwiseAnd(lhs, c)};
66 r = ir.BitwiseOr(r, rhs);
67 }
68 return r;
69}
70
71IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
72 union {
73 u64 insn;
74 BitField<0, 8, IR::Reg> dest_reg;
75 BitField<8, 8, IR::Reg> src_reg;
76 BitField<47, 1, u64> cc;
77 } const lop3{insn};
78
79 if (lop3.cc != 0) {
80 throw NotImplementedException("LOP3 CC");
81 }
82
83 const IR::U32 op_a{v.X(lop3.src_reg)};
84 const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)};
85 v.X(lop3.dest_reg, result);
86 return result;
87}
88
89u64 GetLut48(u64 insn) {
90 union {
91 u64 raw;
92 BitField<48, 8, u64> lut;
93 } const lut{insn};
94 return lut.lut;
95}
96} // Anonymous namespace
97
98void TranslatorVisitor::LOP3_reg(u64 insn) {
99 union {
100 u64 insn;
101 BitField<28, 8, u64> lut;
102 BitField<38, 1, u64> x;
103 BitField<36, 2, PredicateOp> pred_op;
104 BitField<48, 3, IR::Pred> pred;
105 } const lop3{insn};
106
107 if (lop3.x != 0) {
108 throw NotImplementedException("LOP3 X");
109 }
110 const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)};
111 const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)};
112 ir.SetPred(lop3.pred, pred_result);
113}
114
115void TranslatorVisitor::LOP3_cbuf(u64 insn) {
116 LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn));
117}
118
119void TranslatorVisitor::LOP3_imm(u64 insn) {
120 LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn));
121}
122} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
new file mode 100644
index 000000000..4324fd443
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 PR,
13 CC,
14};
15} // Anonymous namespace
16
17void TranslatorVisitor::P2R_reg(u64) {
18 throw NotImplementedException("P2R (reg)");
19}
20
21void TranslatorVisitor::P2R_cbuf(u64) {
22 throw NotImplementedException("P2R (cbuf)");
23}
24
25void TranslatorVisitor::P2R_imm(u64 insn) {
26 union {
27 u64 raw;
28 BitField<0, 8, IR::Reg> dest_reg;
29 BitField<8, 8, IR::Reg> src;
30 BitField<40, 1, Mode> mode;
31 BitField<41, 2, u64> byte_selector;
32 } const p2r{insn};
33
34 const u32 mask{GetImm20(insn).U32()};
35 const bool pr_mode{p2r.mode == Mode::PR};
36 const u32 num_items{pr_mode ? 7U : 4U};
37 const u32 offset{static_cast<u32>(p2r.byte_selector) * 8};
38 IR::U32 insert{ir.Imm32(0)};
39 for (u32 index = 0; index < num_items; ++index) {
40 if (((mask >> index) & 1) == 0) {
41 continue;
42 }
43 const IR::U1 cond{[this, index, pr_mode] {
44 if (pr_mode) {
45 return ir.GetPred(IR::Pred{index});
46 }
47 switch (index) {
48 case 0:
49 return ir.GetZFlag();
50 case 1:
51 return ir.GetSFlag();
52 case 2:
53 return ir.GetCFlag();
54 case 3:
55 return ir.GetOFlag();
56 }
57 throw LogicError("Unreachable P2R index");
58 }()};
59 const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))};
60 insert = ir.BitwiseOr(insert, bit);
61 }
62 const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))};
63 X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert));
64}
65
66} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
new file mode 100644
index 000000000..6bb08db8a
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<39, 4, u64> mask;
18 BitField<12, 4, u64> mov32i_mask;
19 } const mov{insn};
20
21 if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) {
22 throw NotImplementedException("Non-full move mask");
23 }
24 v.X(mov.dest_reg, src);
25}
26} // Anonymous namespace
27
28void TranslatorVisitor::MOV_reg(u64 insn) {
29 MOV(*this, insn, GetReg20(insn));
30}
31
32void TranslatorVisitor::MOV_cbuf(u64 insn) {
33 MOV(*this, insn, GetCbuf(insn));
34}
35
36void TranslatorVisitor::MOV_imm(u64 insn) {
37 MOV(*this, insn, GetImm20(insn));
38}
39
40void TranslatorVisitor::MOV32I(u64 insn) {
41 MOV(*this, insn, GetImm32(insn), true);
42}
43
44} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
new file mode 100644
index 000000000..eda5f177b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 PR,
13 CC,
14};
15
16void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) {
17 switch (index) {
18 case 0:
19 return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)});
20 case 1:
21 return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)});
22 case 2:
23 return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)});
24 case 3:
25 return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)});
26 default:
27 throw LogicError("Unreachable R2P index");
28 }
29}
30
31void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) {
32 union {
33 u64 raw;
34 BitField<8, 8, IR::Reg> src_reg;
35 BitField<40, 1, Mode> mode;
36 BitField<41, 2, u64> byte_selector;
37 } const r2p{insn};
38 const IR::U32 src{v.X(r2p.src_reg)};
39 const IR::U32 count{v.ir.Imm32(1)};
40 const bool pr_mode{r2p.mode == Mode::PR};
41 const u32 num_items{pr_mode ? 7U : 4U};
42 const u32 offset_base{static_cast<u32>(r2p.byte_selector) * 8};
43 for (u32 index = 0; index < num_items; ++index) {
44 const IR::U32 offset{v.ir.Imm32(offset_base + index)};
45 const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))};
46 const IR::U1 src_bit{v.ir.LogicalNot(src_zero)};
47 const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)};
48 const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)};
49 if (pr_mode) {
50 const IR::Pred pred{index};
51 v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)});
52 } else {
53 SetFlag(v.ir, inv_mask_bit, src_bit, index);
54 }
55 }
56}
57} // Anonymous namespace
58
59void TranslatorVisitor::R2P_reg(u64 insn) {
60 R2P(*this, insn, GetReg20(insn));
61}
62
63void TranslatorVisitor::R2P_cbuf(u64 insn) {
64 R2P(*this, insn, GetCbuf(insn));
65}
66
67void TranslatorVisitor::R2P_imm(u64 insn) {
68 R2P(*this, insn, GetImm20(insn));
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
new file mode 100644
index 000000000..20cb2674e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
@@ -0,0 +1,181 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class SpecialRegister : u64 {
12 SR_LANEID = 0,
13 SR_CLOCK = 1,
14 SR_VIRTCFG = 2,
15 SR_VIRTID = 3,
16 SR_PM0 = 4,
17 SR_PM1 = 5,
18 SR_PM2 = 6,
19 SR_PM3 = 7,
20 SR_PM4 = 8,
21 SR_PM5 = 9,
22 SR_PM6 = 10,
23 SR_PM7 = 11,
24 SR12 = 12,
25 SR13 = 13,
26 SR14 = 14,
27 SR_ORDERING_TICKET = 15,
28 SR_PRIM_TYPE = 16,
29 SR_INVOCATION_ID = 17,
30 SR_Y_DIRECTION = 18,
31 SR_THREAD_KILL = 19,
32 SM_SHADER_TYPE = 20,
33 SR_DIRECTCBEWRITEADDRESSLOW = 21,
34 SR_DIRECTCBEWRITEADDRESSHIGH = 22,
35 SR_DIRECTCBEWRITEENABLE = 23,
36 SR_MACHINE_ID_0 = 24,
37 SR_MACHINE_ID_1 = 25,
38 SR_MACHINE_ID_2 = 26,
39 SR_MACHINE_ID_3 = 27,
40 SR_AFFINITY = 28,
41 SR_INVOCATION_INFO = 29,
42 SR_WSCALEFACTOR_XY = 30,
43 SR_WSCALEFACTOR_Z = 31,
44 SR_TID = 32,
45 SR_TID_X = 33,
46 SR_TID_Y = 34,
47 SR_TID_Z = 35,
48 SR_CTA_PARAM = 36,
49 SR_CTAID_X = 37,
50 SR_CTAID_Y = 38,
51 SR_CTAID_Z = 39,
52 SR_NTID = 40,
53 SR_CirQueueIncrMinusOne = 41,
54 SR_NLATC = 42,
55 SR43 = 43,
56 SR_SM_SPA_VERSION = 44,
57 SR_MULTIPASSSHADERINFO = 45,
58 SR_LWINHI = 46,
59 SR_SWINHI = 47,
60 SR_SWINLO = 48,
61 SR_SWINSZ = 49,
62 SR_SMEMSZ = 50,
63 SR_SMEMBANKS = 51,
64 SR_LWINLO = 52,
65 SR_LWINSZ = 53,
66 SR_LMEMLOSZ = 54,
67 SR_LMEMHIOFF = 55,
68 SR_EQMASK = 56,
69 SR_LTMASK = 57,
70 SR_LEMASK = 58,
71 SR_GTMASK = 59,
72 SR_GEMASK = 60,
73 SR_REGALLOC = 61,
74 SR_BARRIERALLOC = 62,
75 SR63 = 63,
76 SR_GLOBALERRORSTATUS = 64,
77 SR65 = 65,
78 SR_WARPERRORSTATUS = 66,
79 SR_WARPERRORSTATUSCLEAR = 67,
80 SR68 = 68,
81 SR69 = 69,
82 SR70 = 70,
83 SR71 = 71,
84 SR_PM_HI0 = 72,
85 SR_PM_HI1 = 73,
86 SR_PM_HI2 = 74,
87 SR_PM_HI3 = 75,
88 SR_PM_HI4 = 76,
89 SR_PM_HI5 = 77,
90 SR_PM_HI6 = 78,
91 SR_PM_HI7 = 79,
92 SR_CLOCKLO = 80,
93 SR_CLOCKHI = 81,
94 SR_GLOBALTIMERLO = 82,
95 SR_GLOBALTIMERHI = 83,
96 SR84 = 84,
97 SR85 = 85,
98 SR86 = 86,
99 SR87 = 87,
100 SR88 = 88,
101 SR89 = 89,
102 SR90 = 90,
103 SR91 = 91,
104 SR92 = 92,
105 SR93 = 93,
106 SR94 = 94,
107 SR95 = 95,
108 SR_HWTASKID = 96,
109 SR_CIRCULARQUEUEENTRYINDEX = 97,
110 SR_CIRCULARQUEUEENTRYADDRESSLOW = 98,
111 SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99,
112};
113
114[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) {
115 switch (special_register) {
116 case SpecialRegister::SR_INVOCATION_ID:
117 return ir.InvocationId();
118 case SpecialRegister::SR_THREAD_KILL:
119 return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))};
120 case SpecialRegister::SR_INVOCATION_INFO:
121 LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO");
122 return ir.Imm32(0x00ff'0000);
123 case SpecialRegister::SR_TID: {
124 const IR::Value tid{ir.LocalInvocationId()};
125 return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)},
126 IR::U32{ir.CompositeExtract(tid, 1)},
127 ir.Imm32(16), ir.Imm32(8)),
128 IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6));
129 }
130 case SpecialRegister::SR_TID_X:
131 return ir.LocalInvocationIdX();
132 case SpecialRegister::SR_TID_Y:
133 return ir.LocalInvocationIdY();
134 case SpecialRegister::SR_TID_Z:
135 return ir.LocalInvocationIdZ();
136 case SpecialRegister::SR_CTAID_X:
137 return ir.WorkgroupIdX();
138 case SpecialRegister::SR_CTAID_Y:
139 return ir.WorkgroupIdY();
140 case SpecialRegister::SR_CTAID_Z:
141 return ir.WorkgroupIdZ();
142 case SpecialRegister::SR_WSCALEFACTOR_XY:
143 LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY");
144 return ir.Imm32(Common::BitCast<u32>(1.0f));
145 case SpecialRegister::SR_WSCALEFACTOR_Z:
146 LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z");
147 return ir.Imm32(Common::BitCast<u32>(1.0f));
148 case SpecialRegister::SR_LANEID:
149 return ir.LaneId();
150 case SpecialRegister::SR_EQMASK:
151 return ir.SubgroupEqMask();
152 case SpecialRegister::SR_LTMASK:
153 return ir.SubgroupLtMask();
154 case SpecialRegister::SR_LEMASK:
155 return ir.SubgroupLeMask();
156 case SpecialRegister::SR_GTMASK:
157 return ir.SubgroupGtMask();
158 case SpecialRegister::SR_GEMASK:
159 return ir.SubgroupGeMask();
160 case SpecialRegister::SR_Y_DIRECTION:
161 return ir.BitCast<IR::U32>(ir.YDirection());
162 case SpecialRegister::SR_AFFINITY:
163 LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY");
164 return ir.Imm32(0); // This is the default value hardware returns.
165 default:
166 throw NotImplementedException("S2R special register {}", special_register);
167 }
168}
169} // Anonymous namespace
170
171void TranslatorVisitor::S2R(u64 insn) {
172 union {
173 u64 raw;
174 BitField<0, 8, IR::Reg> dest_reg;
175 BitField<20, 8, SpecialRegister> src_reg;
176 } const s2r{insn};
177
178 X(s2r.dest_reg, Read(ir, s2r.src_reg));
179}
180
181} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
new file mode 100644
index 000000000..7e26ab359
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -0,0 +1,283 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/opcodes.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11
12[[noreturn]] static void ThrowNotImplemented(Opcode opcode) {
13 throw NotImplementedException("Instruction {} is not implemented", opcode);
14}
15
16void TranslatorVisitor::ATOM_cas(u64) {
17 ThrowNotImplemented(Opcode::ATOM_cas);
18}
19
20void TranslatorVisitor::ATOMS_cas(u64) {
21 ThrowNotImplemented(Opcode::ATOMS_cas);
22}
23
24void TranslatorVisitor::B2R(u64) {
25 ThrowNotImplemented(Opcode::B2R);
26}
27
28void TranslatorVisitor::BPT(u64) {
29 ThrowNotImplemented(Opcode::BPT);
30}
31
32void TranslatorVisitor::BRA(u64) {
33 ThrowNotImplemented(Opcode::BRA);
34}
35
36void TranslatorVisitor::BRK(u64) {
37 ThrowNotImplemented(Opcode::BRK);
38}
39
40void TranslatorVisitor::CAL() {
41 // CAL is a no-op
42}
43
44void TranslatorVisitor::CCTL(u64) {
45 ThrowNotImplemented(Opcode::CCTL);
46}
47
48void TranslatorVisitor::CCTLL(u64) {
49 ThrowNotImplemented(Opcode::CCTLL);
50}
51
52void TranslatorVisitor::CONT(u64) {
53 ThrowNotImplemented(Opcode::CONT);
54}
55
56void TranslatorVisitor::CS2R(u64) {
57 ThrowNotImplemented(Opcode::CS2R);
58}
59
60void TranslatorVisitor::FCHK_reg(u64) {
61 ThrowNotImplemented(Opcode::FCHK_reg);
62}
63
64void TranslatorVisitor::FCHK_cbuf(u64) {
65 ThrowNotImplemented(Opcode::FCHK_cbuf);
66}
67
68void TranslatorVisitor::FCHK_imm(u64) {
69 ThrowNotImplemented(Opcode::FCHK_imm);
70}
71
72void TranslatorVisitor::GETCRSPTR(u64) {
73 ThrowNotImplemented(Opcode::GETCRSPTR);
74}
75
76void TranslatorVisitor::GETLMEMBASE(u64) {
77 ThrowNotImplemented(Opcode::GETLMEMBASE);
78}
79
80void TranslatorVisitor::IDE(u64) {
81 ThrowNotImplemented(Opcode::IDE);
82}
83
84void TranslatorVisitor::IDP_reg(u64) {
85 ThrowNotImplemented(Opcode::IDP_reg);
86}
87
88void TranslatorVisitor::IDP_imm(u64) {
89 ThrowNotImplemented(Opcode::IDP_imm);
90}
91
92void TranslatorVisitor::IMAD_reg(u64) {
93 ThrowNotImplemented(Opcode::IMAD_reg);
94}
95
96void TranslatorVisitor::IMAD_rc(u64) {
97 ThrowNotImplemented(Opcode::IMAD_rc);
98}
99
100void TranslatorVisitor::IMAD_cr(u64) {
101 ThrowNotImplemented(Opcode::IMAD_cr);
102}
103
104void TranslatorVisitor::IMAD_imm(u64) {
105 ThrowNotImplemented(Opcode::IMAD_imm);
106}
107
108void TranslatorVisitor::IMAD32I(u64) {
109 ThrowNotImplemented(Opcode::IMAD32I);
110}
111
112void TranslatorVisitor::IMADSP_reg(u64) {
113 ThrowNotImplemented(Opcode::IMADSP_reg);
114}
115
116void TranslatorVisitor::IMADSP_rc(u64) {
117 ThrowNotImplemented(Opcode::IMADSP_rc);
118}
119
120void TranslatorVisitor::IMADSP_cr(u64) {
121 ThrowNotImplemented(Opcode::IMADSP_cr);
122}
123
124void TranslatorVisitor::IMADSP_imm(u64) {
125 ThrowNotImplemented(Opcode::IMADSP_imm);
126}
127
128void TranslatorVisitor::IMUL_reg(u64) {
129 ThrowNotImplemented(Opcode::IMUL_reg);
130}
131
132void TranslatorVisitor::IMUL_cbuf(u64) {
133 ThrowNotImplemented(Opcode::IMUL_cbuf);
134}
135
136void TranslatorVisitor::IMUL_imm(u64) {
137 ThrowNotImplemented(Opcode::IMUL_imm);
138}
139
140void TranslatorVisitor::IMUL32I(u64) {
141 ThrowNotImplemented(Opcode::IMUL32I);
142}
143
144void TranslatorVisitor::JCAL(u64) {
145 ThrowNotImplemented(Opcode::JCAL);
146}
147
148void TranslatorVisitor::JMP(u64) {
149 ThrowNotImplemented(Opcode::JMP);
150}
151
152void TranslatorVisitor::KIL() {
153 // KIL is a no-op
154}
155
156void TranslatorVisitor::LD(u64) {
157 ThrowNotImplemented(Opcode::LD);
158}
159
160void TranslatorVisitor::LEPC(u64) {
161 ThrowNotImplemented(Opcode::LEPC);
162}
163
164void TranslatorVisitor::LONGJMP(u64) {
165 ThrowNotImplemented(Opcode::LONGJMP);
166}
167
168void TranslatorVisitor::NOP(u64) {
169 // NOP is No-Op.
170}
171
172void TranslatorVisitor::PBK() {
173 // PBK is a no-op
174}
175
176void TranslatorVisitor::PCNT() {
177 // PCNT is a no-op
178}
179
180void TranslatorVisitor::PEXIT(u64) {
181 ThrowNotImplemented(Opcode::PEXIT);
182}
183
184void TranslatorVisitor::PLONGJMP(u64) {
185 ThrowNotImplemented(Opcode::PLONGJMP);
186}
187
188void TranslatorVisitor::PRET(u64) {
189 ThrowNotImplemented(Opcode::PRET);
190}
191
192void TranslatorVisitor::PRMT_reg(u64) {
193 ThrowNotImplemented(Opcode::PRMT_reg);
194}
195
196void TranslatorVisitor::PRMT_rc(u64) {
197 ThrowNotImplemented(Opcode::PRMT_rc);
198}
199
200void TranslatorVisitor::PRMT_cr(u64) {
201 ThrowNotImplemented(Opcode::PRMT_cr);
202}
203
204void TranslatorVisitor::PRMT_imm(u64) {
205 ThrowNotImplemented(Opcode::PRMT_imm);
206}
207
208void TranslatorVisitor::R2B(u64) {
209 ThrowNotImplemented(Opcode::R2B);
210}
211
212void TranslatorVisitor::RAM(u64) {
213 ThrowNotImplemented(Opcode::RAM);
214}
215
216void TranslatorVisitor::RET(u64) {
217 ThrowNotImplemented(Opcode::RET);
218}
219
220void TranslatorVisitor::RTT(u64) {
221 ThrowNotImplemented(Opcode::RTT);
222}
223
224void TranslatorVisitor::SAM(u64) {
225 ThrowNotImplemented(Opcode::SAM);
226}
227
228void TranslatorVisitor::SETCRSPTR(u64) {
229 ThrowNotImplemented(Opcode::SETCRSPTR);
230}
231
232void TranslatorVisitor::SETLMEMBASE(u64) {
233 ThrowNotImplemented(Opcode::SETLMEMBASE);
234}
235
236void TranslatorVisitor::SSY() {
237 // SSY is a no-op
238}
239
240void TranslatorVisitor::ST(u64) {
241 ThrowNotImplemented(Opcode::ST);
242}
243
244void TranslatorVisitor::STP(u64) {
245 ThrowNotImplemented(Opcode::STP);
246}
247
248void TranslatorVisitor::SUATOM_cas(u64) {
249 ThrowNotImplemented(Opcode::SUATOM_cas);
250}
251
252void TranslatorVisitor::SYNC(u64) {
253 ThrowNotImplemented(Opcode::SYNC);
254}
255
256void TranslatorVisitor::TXA(u64) {
257 ThrowNotImplemented(Opcode::TXA);
258}
259
260void TranslatorVisitor::VABSDIFF(u64) {
261 ThrowNotImplemented(Opcode::VABSDIFF);
262}
263
264void TranslatorVisitor::VABSDIFF4(u64) {
265 ThrowNotImplemented(Opcode::VABSDIFF4);
266}
267
268void TranslatorVisitor::VADD(u64) {
269 ThrowNotImplemented(Opcode::VADD);
270}
271
272void TranslatorVisitor::VSET(u64) {
273 ThrowNotImplemented(Opcode::VSET);
274}
275void TranslatorVisitor::VSHL(u64) {
276 ThrowNotImplemented(Opcode::VSHL);
277}
278
279void TranslatorVisitor::VSHR(u64) {
280 ThrowNotImplemented(Opcode::VSHR);
281}
282
283} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
new file mode 100644
index 000000000..01cfad88d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
@@ -0,0 +1,45 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> output_reg; // Not needed on host
16 BitField<39, 1, u64> emit;
17 BitField<40, 1, u64> cut;
18 } const out{insn};
19
20 stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11));
21
22 if (out.emit != 0) {
23 v.ir.EmitVertex(stream_index);
24 }
25 if (out.cut != 0) {
26 v.ir.EndPrimitive(stream_index);
27 }
28 // Host doesn't need the output register, but we can write to it to avoid undefined reads
29 v.X(out.dest_reg, v.ir.Imm32(0));
30}
31} // Anonymous namespace
32
33void TranslatorVisitor::OUT_reg(u64 insn) {
34 OUT(*this, insn, GetReg20(insn));
35}
36
37void TranslatorVisitor::OUT_cbuf(u64 insn) {
38 OUT(*this, insn, GetCbuf(insn));
39}
40
41void TranslatorVisitor::OUT_imm(u64 insn) {
42 OUT(*this, insn, GetImm20(insn));
43}
44
45} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
new file mode 100644
index 000000000..b4767afb5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
@@ -0,0 +1,46 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 Default,
13 CovMask,
14 Covered,
15 Offset,
16 CentroidOffset,
17 MyIndex,
18};
19} // Anonymous namespace
20
21void TranslatorVisitor::PIXLD(u64 insn) {
22 union {
23 u64 raw;
24 BitField<31, 3, Mode> mode;
25 BitField<0, 8, IR::Reg> dest_reg;
26 BitField<8, 8, IR::Reg> addr_reg;
27 BitField<20, 8, s64> addr_offset;
28 BitField<45, 3, IR::Pred> dest_pred;
29 } const pixld{insn};
30
31 if (pixld.dest_pred != IR::Pred::PT) {
32 throw NotImplementedException("Destination predicate");
33 }
34 if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) {
35 throw NotImplementedException("Non-zero source register");
36 }
37 switch (pixld.mode) {
38 case Mode::MyIndex:
39 X(pixld.dest_reg, ir.SampleId());
40 break;
41 default:
42 throw NotImplementedException("Mode {}", pixld.mode.Value());
43 }
44}
45
46} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
new file mode 100644
index 000000000..75d1fa8c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
@@ -0,0 +1,38 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::PSETP(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 3, IR::Pred> dest_pred_b;
15 BitField<3, 3, IR::Pred> dest_pred_a;
16 BitField<12, 3, IR::Pred> pred_a;
17 BitField<15, 1, u64> neg_pred_a;
18 BitField<24, 2, BooleanOp> bop_1;
19 BitField<29, 3, IR::Pred> pred_b;
20 BitField<32, 1, u64> neg_pred_b;
21 BitField<39, 3, IR::Pred> pred_c;
22 BitField<42, 1, u64> neg_pred_c;
23 BitField<45, 2, BooleanOp> bop_2;
24 } const pset{insn};
25
26 const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
27 const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
28 const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
29
30 const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
31 const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)};
32 const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)};
33 const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)};
34
35 ir.SetPred(pset.dest_pred_a, result_a);
36 ir.SetPred(pset.dest_pred_b, result_b);
37}
38} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
new file mode 100644
index 000000000..b02789874
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
@@ -0,0 +1,53 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::PSET(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<12, 3, IR::Pred> pred_a;
16 BitField<15, 1, u64> neg_pred_a;
17 BitField<24, 2, BooleanOp> bop_1;
18 BitField<29, 3, IR::Pred> pred_b;
19 BitField<32, 1, u64> neg_pred_b;
20 BitField<39, 3, IR::Pred> pred_c;
21 BitField<42, 1, u64> neg_pred_c;
22 BitField<44, 1, u64> bf;
23 BitField<45, 2, BooleanOp> bop_2;
24 BitField<47, 1, u64> cc;
25 } const pset{insn};
26
27 const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
28 const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
29 const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
30
31 const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
32 const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)};
33
34 const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)};
35 const IR::U32 zero{ir.Imm32(0)};
36
37 const IR::U32 result{ir.Select(res_2, true_result, zero)};
38
39 X(pset.dest_reg, result);
40 if (pset.cc != 0) {
41 const IR::U1 is_zero{ir.IEqual(result, zero)};
42 SetZFlag(is_zero);
43 if (pset.bf != 0) {
44 ResetSFlag();
45 } else {
46 SetSFlag(ir.LogicalNot(is_zero));
47 }
48 ResetOFlag();
49 ResetCFlag();
50 }
51}
52
53} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
new file mode 100644
index 000000000..93baa75a9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11
12void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_reg;
17 BitField<39, 3, IR::Pred> pred;
18 BitField<42, 1, u64> neg_pred;
19 } const sel{insn};
20
21 const IR::U1 pred = v.ir.GetPred(sel.pred);
22 IR::U32 op_a{v.X(sel.src_reg)};
23 IR::U32 op_b{src};
24 if (sel.neg_pred != 0) {
25 std::swap(op_a, op_b);
26 }
27 const IR::U32 result{v.ir.Select(pred, op_a, op_b)};
28
29 v.X(sel.dest_reg, result);
30}
31} // Anonymous namespace
32
33void TranslatorVisitor::SEL_reg(u64 insn) {
34 SEL(*this, insn, GetReg20(insn));
35}
36
37void TranslatorVisitor::SEL_cbuf(u64 insn) {
38 SEL(*this, insn, GetCbuf(insn));
39}
40
41void TranslatorVisitor::SEL_imm(u64 insn) {
42 SEL(*this, insn, GetImm20(insn));
43}
44} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
new file mode 100644
index 000000000..63b588ad4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
@@ -0,0 +1,205 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bit>
7
8#include "common/bit_field.h"
9#include "common/common_types.h"
10#include "shader_recompiler/frontend/ir/modifiers.h"
11#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
12
13namespace Shader::Maxwell {
14namespace {
15enum class Type : u64 {
16 _1D,
17 BUFFER_1D,
18 ARRAY_1D,
19 _2D,
20 ARRAY_2D,
21 _3D,
22};
23
24enum class Size : u64 {
25 U32,
26 S32,
27 U64,
28 S64,
29 F32FTZRN,
30 F16x2FTZRN,
31 SD32,
32 SD64,
33};
34
35enum class AtomicOp : u64 {
36 ADD,
37 MIN,
38 MAX,
39 INC,
40 DEC,
41 AND,
42 OR,
43 XOR,
44 EXCH,
45};
46
47enum class Clamp : u64 {
48 IGN,
49 Default,
50 TRAP,
51};
52
53TextureType GetType(Type type) {
54 switch (type) {
55 case Type::_1D:
56 return TextureType::Color1D;
57 case Type::BUFFER_1D:
58 return TextureType::Buffer;
59 case Type::ARRAY_1D:
60 return TextureType::ColorArray1D;
61 case Type::_2D:
62 return TextureType::Color2D;
63 case Type::ARRAY_2D:
64 return TextureType::ColorArray2D;
65 case Type::_3D:
66 return TextureType::Color3D;
67 }
68 throw NotImplementedException("Invalid type {}", type);
69}
70
71IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
72 switch (type) {
73 case Type::_1D:
74 case Type::BUFFER_1D:
75 return v.X(reg);
76 case Type::_2D:
77 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
78 case Type::_3D:
79 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
80 default:
81 break;
82 }
83 throw NotImplementedException("Invalid type {}", type);
84}
85
86IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords,
87 const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op,
88 bool is_signed) {
89 switch (op) {
90 case AtomicOp::ADD:
91 return ir.ImageAtomicIAdd(handle, coords, op_b, info);
92 case AtomicOp::MIN:
93 return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info);
94 case AtomicOp::MAX:
95 return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info);
96 case AtomicOp::INC:
97 return ir.ImageAtomicInc(handle, coords, op_b, info);
98 case AtomicOp::DEC:
99 return ir.ImageAtomicDec(handle, coords, op_b, info);
100 case AtomicOp::AND:
101 return ir.ImageAtomicAnd(handle, coords, op_b, info);
102 case AtomicOp::OR:
103 return ir.ImageAtomicOr(handle, coords, op_b, info);
104 case AtomicOp::XOR:
105 return ir.ImageAtomicXor(handle, coords, op_b, info);
106 case AtomicOp::EXCH:
107 return ir.ImageAtomicExchange(handle, coords, op_b, info);
108 default:
109 throw NotImplementedException("Atomic Operation {}", op);
110 }
111}
112
113ImageFormat Format(Size size) {
114 switch (size) {
115 case Size::U32:
116 case Size::S32:
117 case Size::SD32:
118 return ImageFormat::R32_UINT;
119 default:
120 break;
121 }
122 throw NotImplementedException("Invalid size {}", size);
123}
124
125bool IsSizeInt32(Size size) {
126 switch (size) {
127 case Size::U32:
128 case Size::S32:
129 case Size::SD32:
130 return true;
131 default:
132 return false;
133 }
134}
135
136void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg,
137 IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type,
138 u64 bound_offset, bool is_bindless, bool write_result) {
139 if (clamp != Clamp::IGN) {
140 throw NotImplementedException("Clamp {}", clamp);
141 }
142 if (!IsSizeInt32(size)) {
143 throw NotImplementedException("Size {}", size);
144 }
145 const bool is_signed{size == Size::S32};
146 const ImageFormat format{Format(size)};
147 const TextureType tex_type{GetType(type)};
148 const IR::Value coords{MakeCoords(v, coord_reg, type)};
149
150 const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg)
151 : v.ir.Imm32(static_cast<u32>(bound_offset * 4))};
152 IR::TextureInstInfo info{};
153 info.type.Assign(tex_type);
154 info.image_format.Assign(format);
155
156 // TODO: float/64-bit operand
157 const IR::Value op_b{v.X(operand_reg)};
158 const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)};
159
160 if (write_result) {
161 v.X(dest_reg, IR::U32{color});
162 }
163}
164} // Anonymous namespace
165
166void TranslatorVisitor::SUATOM(u64 insn) {
167 union {
168 u64 raw;
169 BitField<54, 1, u64> is_bindless;
170 BitField<29, 4, AtomicOp> op;
171 BitField<33, 3, Type> type;
172 BitField<51, 3, Size> size;
173 BitField<49, 2, Clamp> clamp;
174 BitField<0, 8, IR::Reg> dest_reg;
175 BitField<8, 8, IR::Reg> coord_reg;
176 BitField<20, 8, IR::Reg> operand_reg;
177 BitField<36, 13, u64> bound_offset; // !is_bindless
178 BitField<39, 8, IR::Reg> bindless_reg; // is_bindless
179 } const suatom{insn};
180
181 ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg,
182 suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset,
183 suatom.is_bindless != 0, true);
184}
185
186void TranslatorVisitor::SURED(u64 insn) {
187 // TODO: confirm offsets
188 union {
189 u64 raw;
190 BitField<51, 1, u64> is_bound;
191 BitField<21, 3, AtomicOp> op;
192 BitField<33, 3, Type> type;
193 BitField<20, 3, Size> size;
194 BitField<49, 2, Clamp> clamp;
195 BitField<0, 8, IR::Reg> operand_reg;
196 BitField<8, 8, IR::Reg> coord_reg;
197 BitField<36, 13, u64> bound_offset; // is_bound
198 BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
199 } const sured{insn};
200 ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg,
201 sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset,
202 sured.is_bound == 0, false);
203}
204
205} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
new file mode 100644
index 000000000..681220a8d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
@@ -0,0 +1,281 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bit>
7
8#include "common/bit_field.h"
9#include "common/common_types.h"
10#include "shader_recompiler/frontend/ir/modifiers.h"
11#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
12
13namespace Shader::Maxwell {
14namespace {
15enum class Type : u64 {
16 _1D,
17 BUFFER_1D,
18 ARRAY_1D,
19 _2D,
20 ARRAY_2D,
21 _3D,
22};
23
24constexpr unsigned R = 1 << 0;
25constexpr unsigned G = 1 << 1;
26constexpr unsigned B = 1 << 2;
27constexpr unsigned A = 1 << 3;
28
29constexpr std::array MASK{
30 0U, //
31 R, //
32 G, //
33 R | G, //
34 B, //
35 R | B, //
36 G | B, //
37 R | G | B, //
38 A, //
39 R | A, //
40 G | A, //
41 R | G | A, //
42 B | A, //
43 R | B | A, //
44 G | B | A, //
45 R | G | B | A, //
46};
47
48enum class Size : u64 {
49 U8,
50 S8,
51 U16,
52 S16,
53 B32,
54 B64,
55 B128,
56};
57
58enum class Clamp : u64 {
59 IGN,
60 Default,
61 TRAP,
62};
63
64// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators
65enum class LoadCache : u64 {
66 CA, // Cache at all levels, likely to be accessed again
67 CG, // Cache at global level (L2 and below, not L1)
68 CI, // ???
69 CV, // Don't cache and fetch again (volatile)
70};
71
72enum class StoreCache : u64 {
73 WB, // Cache write-back all coherent levels
74 CG, // Cache at global level (L2 and below, not L1)
75 CS, // Cache streaming, likely to be accessed once
76 WT, // Cache write-through (to system memory, volatile?)
77};
78
79ImageFormat Format(Size size) {
80 switch (size) {
81 case Size::U8:
82 return ImageFormat::R8_UINT;
83 case Size::S8:
84 return ImageFormat::R8_SINT;
85 case Size::U16:
86 return ImageFormat::R16_UINT;
87 case Size::S16:
88 return ImageFormat::R16_SINT;
89 case Size::B32:
90 return ImageFormat::R32_UINT;
91 case Size::B64:
92 return ImageFormat::R32G32_UINT;
93 case Size::B128:
94 return ImageFormat::R32G32B32A32_UINT;
95 }
96 throw NotImplementedException("Invalid size {}", size);
97}
98
99int SizeInRegs(Size size) {
100 switch (size) {
101 case Size::U8:
102 case Size::S8:
103 case Size::U16:
104 case Size::S16:
105 case Size::B32:
106 return 1;
107 case Size::B64:
108 return 2;
109 case Size::B128:
110 return 4;
111 }
112 throw NotImplementedException("Invalid size {}", size);
113}
114
115TextureType GetType(Type type) {
116 switch (type) {
117 case Type::_1D:
118 return TextureType::Color1D;
119 case Type::BUFFER_1D:
120 return TextureType::Buffer;
121 case Type::ARRAY_1D:
122 return TextureType::ColorArray1D;
123 case Type::_2D:
124 return TextureType::Color2D;
125 case Type::ARRAY_2D:
126 return TextureType::ColorArray2D;
127 case Type::_3D:
128 return TextureType::Color3D;
129 }
130 throw NotImplementedException("Invalid type {}", type);
131}
132
133IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
134 const auto array{[&](int index) {
135 return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16));
136 }};
137 switch (type) {
138 case Type::_1D:
139 case Type::BUFFER_1D:
140 return v.X(reg);
141 case Type::ARRAY_1D:
142 return v.ir.CompositeConstruct(v.X(reg), array(1));
143 case Type::_2D:
144 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
145 case Type::ARRAY_2D:
146 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2));
147 case Type::_3D:
148 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
149 }
150 throw NotImplementedException("Invalid type {}", type);
151}
152
153unsigned SwizzleMask(u64 swizzle) {
154 if (swizzle == 0 || swizzle >= MASK.size()) {
155 throw NotImplementedException("Invalid swizzle {}", swizzle);
156 }
157 return MASK[swizzle];
158}
159
160IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) {
161 std::array<IR::U32, 4> colors;
162 for (int i = 0; i < num_regs; ++i) {
163 colors[static_cast<size_t>(i)] = ir.GetReg(reg + i);
164 }
165 for (int i = num_regs; i < 4; ++i) {
166 colors[static_cast<size_t>(i)] = ir.Imm32(0);
167 }
168 return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]);
169}
170} // Anonymous namespace
171
172void TranslatorVisitor::SULD(u64 insn) {
173 union {
174 u64 raw;
175 BitField<51, 1, u64> is_bound;
176 BitField<52, 1, u64> d;
177 BitField<23, 1, u64> ba;
178 BitField<33, 3, Type> type;
179 BitField<24, 2, LoadCache> cache;
180 BitField<20, 3, Size> size; // .D
181 BitField<20, 4, u64> swizzle; // .P
182 BitField<49, 2, Clamp> clamp;
183 BitField<0, 8, IR::Reg> dest_reg;
184 BitField<8, 8, IR::Reg> coord_reg;
185 BitField<36, 13, u64> bound_offset; // is_bound
186 BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
187 } const suld{insn};
188
189 if (suld.clamp != Clamp::IGN) {
190 throw NotImplementedException("Clamp {}", suld.clamp.Value());
191 }
192 if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) {
193 throw NotImplementedException("Cache {}", suld.cache.Value());
194 }
195 const bool is_typed{suld.d != 0};
196 if (is_typed && suld.ba != 0) {
197 throw NotImplementedException("BA");
198 }
199
200 const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless};
201 const TextureType type{GetType(suld.type)};
202 const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)};
203 const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4))
204 : X(suld.bindless_reg)};
205 IR::TextureInstInfo info{};
206 info.type.Assign(type);
207 info.image_format.Assign(format);
208
209 const IR::Value result{ir.ImageRead(handle, coords, info)};
210 IR::Reg dest_reg{suld.dest_reg};
211 if (is_typed) {
212 const int num_regs{SizeInRegs(suld.size)};
213 for (int i = 0; i < num_regs; ++i) {
214 X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
215 }
216 } else {
217 const unsigned mask{SwizzleMask(suld.swizzle)};
218 const int bits{std::popcount(mask)};
219 if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast<size_t>(bits))) {
220 throw NotImplementedException("Unaligned destination register");
221 }
222 for (unsigned component = 0; component < 4; ++component) {
223 if (((mask >> component) & 1) == 0) {
224 continue;
225 }
226 X(dest_reg, IR::U32{ir.CompositeExtract(result, component)});
227 ++dest_reg;
228 }
229 }
230}
231
232void TranslatorVisitor::SUST(u64 insn) {
233 union {
234 u64 raw;
235 BitField<51, 1, u64> is_bound;
236 BitField<52, 1, u64> d;
237 BitField<23, 1, u64> ba;
238 BitField<33, 3, Type> type;
239 BitField<24, 2, StoreCache> cache;
240 BitField<20, 3, Size> size; // .D
241 BitField<20, 4, u64> swizzle; // .P
242 BitField<49, 2, Clamp> clamp;
243 BitField<0, 8, IR::Reg> data_reg;
244 BitField<8, 8, IR::Reg> coord_reg;
245 BitField<36, 13, u64> bound_offset; // is_bound
246 BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
247 } const sust{insn};
248
249 if (sust.clamp != Clamp::IGN) {
250 throw NotImplementedException("Clamp {}", sust.clamp.Value());
251 }
252 if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) {
253 throw NotImplementedException("Cache {}", sust.cache.Value());
254 }
255 const bool is_typed{sust.d != 0};
256 if (is_typed && sust.ba != 0) {
257 throw NotImplementedException("BA");
258 }
259 const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless};
260 const TextureType type{GetType(sust.type)};
261 const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)};
262 const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4))
263 : X(sust.bindless_reg)};
264 IR::TextureInstInfo info{};
265 info.type.Assign(type);
266 info.image_format.Assign(format);
267
268 IR::Value color;
269 if (is_typed) {
270 color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size));
271 } else {
272 const unsigned mask{SwizzleMask(sust.swizzle)};
273 if (mask != 0xf) {
274 throw NotImplementedException("Non-full mask");
275 }
276 color = MakeColor(ir, sust.data_reg, 4);
277 }
278 ir.ImageWrite(handle, coords, color, info);
279}
280
281} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
new file mode 100644
index 000000000..0046b5edd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
@@ -0,0 +1,236 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Blod : u64 {
15 None,
16 LZ,
17 LB,
18 LL,
19 INVALIDBLOD4,
20 INVALIDBLOD5,
21 LBA,
22 LLA,
23};
24
25enum class TextureType : u64 {
26 _1D,
27 ARRAY_1D,
28 _2D,
29 ARRAY_2D,
30 _3D,
31 ARRAY_3D,
32 CUBE,
33 ARRAY_CUBE,
34};
35
36Shader::TextureType GetType(TextureType type) {
37 switch (type) {
38 case TextureType::_1D:
39 return Shader::TextureType::Color1D;
40 case TextureType::ARRAY_1D:
41 return Shader::TextureType::ColorArray1D;
42 case TextureType::_2D:
43 return Shader::TextureType::Color2D;
44 case TextureType::ARRAY_2D:
45 return Shader::TextureType::ColorArray2D;
46 case TextureType::_3D:
47 return Shader::TextureType::Color3D;
48 case TextureType::ARRAY_3D:
49 throw NotImplementedException("3D array texture type");
50 case TextureType::CUBE:
51 return Shader::TextureType::ColorCube;
52 case TextureType::ARRAY_CUBE:
53 return Shader::TextureType::ColorArrayCube;
54 }
55 throw NotImplementedException("Invalid texture type {}", type);
56}
57
58IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
59 const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
60 switch (type) {
61 case TextureType::_1D:
62 return v.F(reg);
63 case TextureType::ARRAY_1D:
64 return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
65 case TextureType::_2D:
66 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
67 case TextureType::ARRAY_2D:
68 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
69 case TextureType::_3D:
70 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
71 case TextureType::ARRAY_3D:
72 throw NotImplementedException("3D array texture type");
73 case TextureType::CUBE:
74 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
75 case TextureType::ARRAY_CUBE:
76 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
77 }
78 throw NotImplementedException("Invalid texture type {}", type);
79}
80
81IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) {
82 switch (blod) {
83 case Blod::None:
84 return v.ir.Imm32(0.0f);
85 case Blod::LZ:
86 return v.ir.Imm32(0.0f);
87 case Blod::LB:
88 case Blod::LL:
89 case Blod::LBA:
90 case Blod::LLA:
91 return v.F(reg++);
92 case Blod::INVALIDBLOD4:
93 case Blod::INVALIDBLOD5:
94 break;
95 }
96 throw NotImplementedException("Invalid blod {}", blod);
97}
98
99IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
100 const IR::U32 value{v.X(reg++)};
101 switch (type) {
102 case TextureType::_1D:
103 case TextureType::ARRAY_1D:
104 return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
105 case TextureType::_2D:
106 case TextureType::ARRAY_2D:
107 return v.ir.CompositeConstruct(
108 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
109 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
110 case TextureType::_3D:
111 case TextureType::ARRAY_3D:
112 return v.ir.CompositeConstruct(
113 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
114 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
115 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
116 case TextureType::CUBE:
117 case TextureType::ARRAY_CUBE:
118 throw NotImplementedException("Illegal offset on CUBE sample");
119 }
120 throw NotImplementedException("Invalid texture type {}", type);
121}
122
123bool HasExplicitLod(Blod blod) {
124 switch (blod) {
125 case Blod::LL:
126 case Blod::LLA:
127 case Blod::LZ:
128 return true;
129 default:
130 return false;
131 }
132}
133
134void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
135 std::optional<u32> cbuf_offset) {
136 union {
137 u64 raw;
138 BitField<35, 1, u64> ndv;
139 BitField<49, 1, u64> nodep;
140 BitField<50, 1, u64> dc;
141 BitField<51, 3, IR::Pred> sparse_pred;
142 BitField<0, 8, IR::Reg> dest_reg;
143 BitField<8, 8, IR::Reg> coord_reg;
144 BitField<20, 8, IR::Reg> meta_reg;
145 BitField<28, 3, TextureType> type;
146 BitField<31, 4, u64> mask;
147 } const tex{insn};
148
149 if (lc) {
150 throw NotImplementedException("LC");
151 }
152 const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)};
153
154 IR::Reg meta_reg{tex.meta_reg};
155 IR::Value handle;
156 IR::Value offset;
157 IR::F32 dref;
158 IR::F32 lod_clamp;
159 if (cbuf_offset) {
160 handle = v.ir.Imm32(*cbuf_offset);
161 } else {
162 handle = v.X(meta_reg++);
163 }
164 const IR::F32 lod{MakeLod(v, meta_reg, blod)};
165 if (aoffi) {
166 offset = MakeOffset(v, meta_reg, tex.type);
167 }
168 if (tex.dc != 0) {
169 dref = v.F(meta_reg++);
170 }
171 IR::TextureInstInfo info{};
172 info.type.Assign(GetType(tex.type));
173 info.is_depth.Assign(tex.dc != 0 ? 1 : 0);
174 info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
175 info.has_lod_clamp.Assign(lc ? 1 : 0);
176
177 const IR::Value sample{[&]() -> IR::Value {
178 if (tex.dc == 0) {
179 if (HasExplicitLod(blod)) {
180 return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info);
181 } else {
182 return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info);
183 }
184 }
185 if (HasExplicitLod(blod)) {
186 return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info);
187 } else {
188 return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp,
189 info);
190 }
191 }()};
192
193 IR::Reg dest_reg{tex.dest_reg};
194 for (int element = 0; element < 4; ++element) {
195 if (((tex.mask >> element) & 1) == 0) {
196 continue;
197 }
198 IR::F32 value;
199 if (tex.dc != 0) {
200 value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
201 } else {
202 value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))};
203 }
204 v.F(dest_reg, value);
205 ++dest_reg;
206 }
207 if (tex.sparse_pred != IR::Pred::PT) {
208 v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
209 }
210}
211} // Anonymous namespace
212
213void TranslatorVisitor::TEX(u64 insn) {
214 union {
215 u64 raw;
216 BitField<54, 1, u64> aoffi;
217 BitField<55, 3, Blod> blod;
218 BitField<58, 1, u64> lc;
219 BitField<36, 13, u64> cbuf_offset;
220 } const tex{insn};
221
222 Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4));
223}
224
225void TranslatorVisitor::TEX_b(u64 insn) {
226 union {
227 u64 raw;
228 BitField<36, 1, u64> aoffi;
229 BitField<37, 3, Blod> blod;
230 BitField<40, 1, u64> lc;
231 } const tex{insn};
232
233 Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt);
234}
235
236} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
new file mode 100644
index 000000000..154e7f1a1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
@@ -0,0 +1,266 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Precision : u64 {
15 F16,
16 F32,
17};
18
19union Encoding {
20 u64 raw;
21 BitField<59, 1, Precision> precision;
22 BitField<53, 4, u64> encoding;
23 BitField<49, 1, u64> nodep;
24 BitField<28, 8, IR::Reg> dest_reg_b;
25 BitField<0, 8, IR::Reg> dest_reg_a;
26 BitField<8, 8, IR::Reg> src_reg_a;
27 BitField<20, 8, IR::Reg> src_reg_b;
28 BitField<36, 13, u64> cbuf_offset;
29 BitField<50, 3, u64> swizzle;
30};
31
32constexpr unsigned R = 1;
33constexpr unsigned G = 2;
34constexpr unsigned B = 4;
35constexpr unsigned A = 8;
36
37constexpr std::array RG_LUT{
38 R, //
39 G, //
40 B, //
41 A, //
42 R | G, //
43 R | A, //
44 G | A, //
45 B | A, //
46};
47
48constexpr std::array RGBA_LUT{
49 R | G | B, //
50 R | G | A, //
51 R | B | A, //
52 G | B | A, //
53 R | G | B | A, //
54};
55
56void CheckAlignment(IR::Reg reg, size_t alignment) {
57 if (!IR::IsAligned(reg, alignment)) {
58 throw NotImplementedException("Unaligned source register {}", reg);
59 }
60}
61
62template <typename... Args>
63IR::Value Composite(TranslatorVisitor& v, Args... regs) {
64 return v.ir.CompositeConstruct(v.F(regs)...);
65}
66
67IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
68 return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16)));
69}
70
71IR::Value Sample(TranslatorVisitor& v, u64 insn) {
72 const Encoding texs{insn};
73 const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))};
74 const IR::F32 zero{v.ir.Imm32(0.0f)};
75 const IR::Reg reg_a{texs.src_reg_a};
76 const IR::Reg reg_b{texs.src_reg_b};
77 IR::TextureInstInfo info{};
78 if (texs.precision == Precision::F16) {
79 info.relaxed_precision.Assign(1);
80 }
81 switch (texs.encoding) {
82 case 0: // 1D.LZ
83 info.type.Assign(TextureType::Color1D);
84 return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info);
85 case 1: // 2D
86 info.type.Assign(TextureType::Color2D);
87 return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info);
88 case 2: // 2D.LZ
89 info.type.Assign(TextureType::Color2D);
90 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info);
91 case 3: // 2D.LL
92 CheckAlignment(reg_a, 2);
93 info.type.Assign(TextureType::Color2D);
94 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {},
95 info);
96 case 4: // 2D.DC
97 CheckAlignment(reg_a, 2);
98 info.type.Assign(TextureType::Color2D);
99 info.is_depth.Assign(1);
100 return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
101 {}, {}, {}, info);
102 case 5: // 2D.LL.DC
103 CheckAlignment(reg_a, 2);
104 CheckAlignment(reg_b, 2);
105 info.type.Assign(TextureType::Color2D);
106 info.is_depth.Assign(1);
107 return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1),
108 v.F(reg_b + 1), v.F(reg_b), {}, info);
109 case 6: // 2D.LZ.DC
110 CheckAlignment(reg_a, 2);
111 info.type.Assign(TextureType::Color2D);
112 info.is_depth.Assign(1);
113 return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
114 zero, {}, info);
115 case 7: // ARRAY_2D
116 CheckAlignment(reg_a, 2);
117 info.type.Assign(TextureType::ColorArray2D);
118 return v.ir.ImageSampleImplicitLod(
119 handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
120 {}, {}, {}, info);
121 case 8: // ARRAY_2D.LZ
122 CheckAlignment(reg_a, 2);
123 info.type.Assign(TextureType::ColorArray2D);
124 return v.ir.ImageSampleExplicitLod(
125 handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
126 zero, {}, info);
127 case 9: // ARRAY_2D.LZ.DC
128 CheckAlignment(reg_a, 2);
129 CheckAlignment(reg_b, 2);
130 info.type.Assign(TextureType::ColorArray2D);
131 info.is_depth.Assign(1);
132 return v.ir.ImageSampleDrefExplicitLod(
133 handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
134 v.F(reg_b + 1), zero, {}, info);
135 case 10: // 3D
136 CheckAlignment(reg_a, 2);
137 info.type.Assign(TextureType::Color3D);
138 return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
139 {}, info);
140 case 11: // 3D.LZ
141 CheckAlignment(reg_a, 2);
142 info.type.Assign(TextureType::Color3D);
143 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {},
144 info);
145 case 12: // CUBE
146 CheckAlignment(reg_a, 2);
147 info.type.Assign(TextureType::ColorCube);
148 return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
149 {}, info);
150 case 13: // CUBE.LL
151 CheckAlignment(reg_a, 2);
152 CheckAlignment(reg_b, 2);
153 info.type.Assign(TextureType::ColorCube);
154 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b),
155 v.F(reg_b + 1), {}, info);
156 default:
157 throw NotImplementedException("Illegal encoding {}", texs.encoding.Value());
158 }
159}
160
161unsigned Swizzle(u64 insn) {
162 const Encoding texs{insn};
163 const size_t encoding{texs.swizzle};
164 if (texs.dest_reg_b == IR::Reg::RZ) {
165 if (encoding >= RG_LUT.size()) {
166 throw NotImplementedException("Illegal RG encoding {}", encoding);
167 }
168 return RG_LUT[encoding];
169 } else {
170 if (encoding >= RGBA_LUT.size()) {
171 throw NotImplementedException("Illegal RGBA encoding {}", encoding);
172 }
173 return RGBA_LUT[encoding];
174 }
175}
176
177IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
178 const bool is_shadow{sample.Type() == IR::Type::F32};
179 if (is_shadow) {
180 const bool is_alpha{component == 3};
181 return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample};
182 } else {
183 return IR::F32{v.ir.CompositeExtract(sample, component)};
184 }
185}
186
187IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
188 const Encoding texs{insn};
189 switch (index) {
190 case 0:
191 return texs.dest_reg_a;
192 case 1:
193 CheckAlignment(texs.dest_reg_a, 2);
194 return texs.dest_reg_a + 1;
195 case 2:
196 return texs.dest_reg_b;
197 case 3:
198 CheckAlignment(texs.dest_reg_b, 2);
199 return texs.dest_reg_b + 1;
200 }
201 throw LogicError("Invalid store index {}", index);
202}
203
204void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
205 const unsigned swizzle{Swizzle(insn)};
206 unsigned store_index{0};
207 for (unsigned component = 0; component < 4; ++component) {
208 if (((swizzle >> component) & 1) == 0) {
209 continue;
210 }
211 const IR::Reg dest{RegStoreComponent32(insn, store_index)};
212 v.F(dest, Extract(v, sample, component));
213 ++store_index;
214 }
215}
216
217IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
218 return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
219}
220
221void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
222 const unsigned swizzle{Swizzle(insn)};
223 unsigned store_index{0};
224 std::array<IR::F32, 4> swizzled;
225 for (unsigned component = 0; component < 4; ++component) {
226 if (((swizzle >> component) & 1) == 0) {
227 continue;
228 }
229 swizzled[store_index] = Extract(v, sample, component);
230 ++store_index;
231 }
232 const IR::F32 zero{v.ir.Imm32(0.0f)};
233 const Encoding texs{insn};
234 switch (store_index) {
235 case 1:
236 v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero));
237 break;
238 case 2:
239 case 3:
240 case 4:
241 v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
242 switch (store_index) {
243 case 2:
244 break;
245 case 3:
246 v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero));
247 break;
248 case 4:
249 v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
250 break;
251 }
252 break;
253 }
254}
255} // Anonymous namespace
256
257void TranslatorVisitor::TEXS(u64 insn) {
258 const IR::Value sample{Sample(*this, insn)};
259 if (Encoding{insn}.precision == Precision::F32) {
260 Store32(*this, insn, sample);
261 } else {
262 Store16(*this, insn, sample);
263 }
264}
265
266} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
new file mode 100644
index 000000000..218cbc1a8
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
@@ -0,0 +1,208 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26enum class OffsetType : u64 {
27 None = 0,
28 AOFFI,
29 PTP,
30 Invalid,
31};
32
33enum class ComponentType : u64 {
34 R = 0,
35 G = 1,
36 B = 2,
37 A = 3,
38};
39
40Shader::TextureType GetType(TextureType type) {
41 switch (type) {
42 case TextureType::_1D:
43 return Shader::TextureType::Color1D;
44 case TextureType::ARRAY_1D:
45 return Shader::TextureType::ColorArray1D;
46 case TextureType::_2D:
47 return Shader::TextureType::Color2D;
48 case TextureType::ARRAY_2D:
49 return Shader::TextureType::ColorArray2D;
50 case TextureType::_3D:
51 return Shader::TextureType::Color3D;
52 case TextureType::ARRAY_3D:
53 throw NotImplementedException("3D array texture type");
54 case TextureType::CUBE:
55 return Shader::TextureType::ColorCube;
56 case TextureType::ARRAY_CUBE:
57 return Shader::TextureType::ColorArrayCube;
58 }
59 throw NotImplementedException("Invalid texture type {}", type);
60}
61
62IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
63 const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
64 switch (type) {
65 case TextureType::_1D:
66 return v.F(reg);
67 case TextureType::ARRAY_1D:
68 return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
69 case TextureType::_2D:
70 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
71 case TextureType::ARRAY_2D:
72 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
73 case TextureType::_3D:
74 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
75 case TextureType::ARRAY_3D:
76 throw NotImplementedException("3D array texture type");
77 case TextureType::CUBE:
78 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
79 case TextureType::ARRAY_CUBE:
80 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
81 }
82 throw NotImplementedException("Invalid texture type {}", type);
83}
84
85IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
86 const IR::U32 value{v.X(reg++)};
87 switch (type) {
88 case TextureType::_1D:
89 case TextureType::ARRAY_1D:
90 return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true);
91 case TextureType::_2D:
92 case TextureType::ARRAY_2D:
93 return v.ir.CompositeConstruct(
94 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
95 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
96 case TextureType::_3D:
97 case TextureType::ARRAY_3D:
98 return v.ir.CompositeConstruct(
99 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
100 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true),
101 v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true));
102 case TextureType::CUBE:
103 case TextureType::ARRAY_CUBE:
104 throw NotImplementedException("Illegal offset on CUBE sample");
105 }
106 throw NotImplementedException("Invalid texture type {}", type);
107}
108
109std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) {
110 const IR::U32 value1{v.X(reg++)};
111 const IR::U32 value2{v.X(reg++)};
112 const IR::U32 bitsize{v.ir.Imm32(6)};
113 const auto make_vector{[&v, &bitsize](const IR::U32& value) {
114 return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true),
115 v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true),
116 v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true),
117 v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true));
118 }};
119 return {make_vector(value1), make_vector(value2)};
120}
121
122void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type,
123 bool is_bindless) {
124 union {
125 u64 raw;
126 BitField<35, 1, u64> ndv;
127 BitField<49, 1, u64> nodep;
128 BitField<50, 1, u64> dc;
129 BitField<51, 3, IR::Pred> sparse_pred;
130 BitField<0, 8, IR::Reg> dest_reg;
131 BitField<8, 8, IR::Reg> coord_reg;
132 BitField<20, 8, IR::Reg> meta_reg;
133 BitField<28, 3, TextureType> type;
134 BitField<31, 4, u64> mask;
135 BitField<36, 13, u64> cbuf_offset;
136 } const tld4{insn};
137
138 const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)};
139
140 IR::Reg meta_reg{tld4.meta_reg};
141 IR::Value handle;
142 IR::Value offset;
143 IR::Value offset2;
144 IR::F32 dref;
145 if (!is_bindless) {
146 handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4));
147 } else {
148 handle = v.X(meta_reg++);
149 }
150 switch (offset_type) {
151 case OffsetType::None:
152 break;
153 case OffsetType::AOFFI:
154 offset = MakeOffset(v, meta_reg, tld4.type);
155 break;
156 case OffsetType::PTP:
157 std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg);
158 break;
159 default:
160 throw NotImplementedException("Invalid offset type {}", offset_type);
161 }
162 if (tld4.dc != 0) {
163 dref = v.F(meta_reg++);
164 }
165 IR::TextureInstInfo info{};
166 info.type.Assign(GetType(tld4.type));
167 info.is_depth.Assign(tld4.dc != 0 ? 1 : 0);
168 info.gather_component.Assign(static_cast<u32>(component_type));
169 const IR::Value sample{[&] {
170 if (tld4.dc == 0) {
171 return v.ir.ImageGather(handle, coords, offset, offset2, info);
172 }
173 return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info);
174 }()};
175
176 IR::Reg dest_reg{tld4.dest_reg};
177 for (size_t element = 0; element < 4; ++element) {
178 if (((tld4.mask >> element) & 1) == 0) {
179 continue;
180 }
181 v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
182 ++dest_reg;
183 }
184 if (tld4.sparse_pred != IR::Pred::PT) {
185 v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
186 }
187}
188} // Anonymous namespace
189
190void TranslatorVisitor::TLD4(u64 insn) {
191 union {
192 u64 raw;
193 BitField<56, 2, ComponentType> component;
194 BitField<54, 2, OffsetType> offset;
195 } const tld4{insn};
196 Impl(*this, insn, tld4.component, tld4.offset, false);
197}
198
199void TranslatorVisitor::TLD4_b(u64 insn) {
200 union {
201 u64 raw;
202 BitField<38, 2, ComponentType> component;
203 BitField<36, 2, OffsetType> offset;
204 } const tld4{insn};
205 Impl(*this, insn, tld4.component, tld4.offset, true);
206}
207
208} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
new file mode 100644
index 000000000..34efa2d50
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
@@ -0,0 +1,134 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Precision : u64 {
15 F32,
16 F16,
17};
18
19enum class ComponentType : u64 {
20 R = 0,
21 G = 1,
22 B = 2,
23 A = 3,
24};
25
26union Encoding {
27 u64 raw;
28 BitField<55, 1, Precision> precision;
29 BitField<52, 2, ComponentType> component_type;
30 BitField<51, 1, u64> aoffi;
31 BitField<50, 1, u64> dc;
32 BitField<49, 1, u64> nodep;
33 BitField<28, 8, IR::Reg> dest_reg_b;
34 BitField<0, 8, IR::Reg> dest_reg_a;
35 BitField<8, 8, IR::Reg> src_reg_a;
36 BitField<20, 8, IR::Reg> src_reg_b;
37 BitField<36, 13, u64> cbuf_offset;
38};
39
40void CheckAlignment(IR::Reg reg, size_t alignment) {
41 if (!IR::IsAligned(reg, alignment)) {
42 throw NotImplementedException("Unaligned source register {}", reg);
43 }
44}
45
46IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
47 const IR::U32 value{v.X(reg)};
48 return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
49 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
50}
51
52IR::Value Sample(TranslatorVisitor& v, u64 insn) {
53 const Encoding tld4s{insn};
54 const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tld4s.cbuf_offset * 4))};
55 const IR::Reg reg_a{tld4s.src_reg_a};
56 const IR::Reg reg_b{tld4s.src_reg_b};
57 IR::TextureInstInfo info{};
58 if (tld4s.precision == Precision::F16) {
59 info.relaxed_precision.Assign(1);
60 }
61 info.gather_component.Assign(static_cast<u32>(tld4s.component_type.Value()));
62 info.type.Assign(Shader::TextureType::Color2D);
63 info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0);
64 IR::Value coords;
65 if (tld4s.aoffi != 0) {
66 CheckAlignment(reg_a, 2);
67 coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
68 IR::Value offset = MakeOffset(v, reg_b);
69 if (tld4s.dc != 0) {
70 CheckAlignment(reg_b, 2);
71 IR::F32 dref = v.F(reg_b + 1);
72 return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info);
73 }
74 return v.ir.ImageGather(handle, coords, offset, {}, info);
75 }
76 if (tld4s.dc != 0) {
77 CheckAlignment(reg_a, 2);
78 coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
79 IR::F32 dref = v.F(reg_b);
80 return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info);
81 }
82 coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b));
83 return v.ir.ImageGather(handle, coords, {}, {}, info);
84}
85
86IR::Reg RegStoreComponent32(u64 insn, size_t index) {
87 const Encoding tlds4{insn};
88 switch (index) {
89 case 0:
90 return tlds4.dest_reg_a;
91 case 1:
92 CheckAlignment(tlds4.dest_reg_a, 2);
93 return tlds4.dest_reg_a + 1;
94 case 2:
95 return tlds4.dest_reg_b;
96 case 3:
97 CheckAlignment(tlds4.dest_reg_b, 2);
98 return tlds4.dest_reg_b + 1;
99 }
100 throw LogicError("Invalid store index {}", index);
101}
102
103void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
104 for (size_t component = 0; component < 4; ++component) {
105 const IR::Reg dest{RegStoreComponent32(insn, component)};
106 v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)});
107 }
108}
109
110IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
111 return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
112}
113
114void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
115 std::array<IR::F32, 4> swizzled;
116 for (size_t component = 0; component < 4; ++component) {
117 swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)};
118 }
119 const Encoding tld4s{insn};
120 v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
121 v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
122}
123} // Anonymous namespace
124
125void TranslatorVisitor::TLD4S(u64 insn) {
126 const IR::Value sample{Sample(*this, insn)};
127 if (Encoding{insn}.precision == Precision::F32) {
128 Store32(*this, insn, sample);
129 } else {
130 Store16(*this, insn, sample);
131 }
132}
133
134} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
new file mode 100644
index 000000000..c3fe3ffda
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
@@ -0,0 +1,182 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26Shader::TextureType GetType(TextureType type) {
27 switch (type) {
28 case TextureType::_1D:
29 return Shader::TextureType::Color1D;
30 case TextureType::ARRAY_1D:
31 return Shader::TextureType::ColorArray1D;
32 case TextureType::_2D:
33 return Shader::TextureType::Color2D;
34 case TextureType::ARRAY_2D:
35 return Shader::TextureType::ColorArray2D;
36 case TextureType::_3D:
37 return Shader::TextureType::Color3D;
38 case TextureType::ARRAY_3D:
39 throw NotImplementedException("3D array texture type");
40 case TextureType::CUBE:
41 return Shader::TextureType::ColorCube;
42 case TextureType::ARRAY_CUBE:
43 return Shader::TextureType::ColorArrayCube;
44 }
45 throw NotImplementedException("Invalid texture type {}", type);
46}
47
48IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) {
49 const IR::U32 value{v.X(reg)};
50 const u32 base{has_lod_clamp ? 12U : 16U};
51 return v.ir.CompositeConstruct(
52 v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true),
53 v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true));
54}
55
56void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
57 union {
58 u64 raw;
59 BitField<49, 1, u64> nodep;
60 BitField<35, 1, u64> aoffi;
61 BitField<50, 1, u64> lc;
62 BitField<51, 3, IR::Pred> sparse_pred;
63 BitField<0, 8, IR::Reg> dest_reg;
64 BitField<8, 8, IR::Reg> coord_reg;
65 BitField<20, 8, IR::Reg> derivate_reg;
66 BitField<28, 3, TextureType> type;
67 BitField<31, 4, u64> mask;
68 BitField<36, 13, u64> cbuf_offset;
69 } const txd{insn};
70
71 const bool has_lod_clamp = txd.lc != 0;
72 if (has_lod_clamp) {
73 throw NotImplementedException("TXD.LC - CLAMP is not implemented");
74 }
75
76 IR::Value coords;
77 u32 num_derivates{};
78 IR::Reg base_reg{txd.coord_reg};
79 IR::Reg last_reg;
80 IR::Value handle;
81 if (is_bindless) {
82 handle = v.X(base_reg++);
83 } else {
84 handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4));
85 }
86
87 const auto read_array{[&]() -> IR::F32 {
88 const IR::U32 base{v.ir.Imm32(0)};
89 const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)};
90 const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)};
91 return v.ir.ConvertUToF(32, 16, array_index);
92 }};
93 switch (txd.type) {
94 case TextureType::_1D: {
95 coords = v.F(base_reg);
96 num_derivates = 1;
97 last_reg = base_reg + 1;
98 break;
99 }
100 case TextureType::ARRAY_1D: {
101 last_reg = base_reg + 1;
102 coords = v.ir.CompositeConstruct(v.F(base_reg), read_array());
103 num_derivates = 1;
104 break;
105 }
106 case TextureType::_2D: {
107 last_reg = base_reg + 2;
108 coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1));
109 num_derivates = 2;
110 break;
111 }
112 case TextureType::ARRAY_2D: {
113 last_reg = base_reg + 2;
114 coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array());
115 num_derivates = 2;
116 break;
117 }
118 default:
119 throw NotImplementedException("Invalid texture type");
120 }
121
122 const IR::Reg derivate_reg{txd.derivate_reg};
123 IR::Value derivates;
124 switch (num_derivates) {
125 case 1: {
126 derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1));
127 break;
128 }
129 case 2: {
130 derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1),
131 v.F(derivate_reg + 2), v.F(derivate_reg + 3));
132 break;
133 }
134 default:
135 throw NotImplementedException("Invalid texture type");
136 }
137
138 IR::Value offset;
139 if (txd.aoffi != 0) {
140 offset = MakeOffset(v, last_reg, has_lod_clamp);
141 }
142
143 IR::F32 lod_clamp;
144 if (has_lod_clamp) {
145 // Lod Clamp is a Fixed Point 4.8, we need to transform it to float.
146 // to convert a fixed point, float(value) / float(1 << fixed_point)
147 // in this case the fixed_point is 8.
148 const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))};
149 const IR::F32 fixp_lc{v.ir.ConvertUToF(
150 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))};
151 lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f);
152 }
153
154 IR::TextureInstInfo info{};
155 info.type.Assign(GetType(txd.type));
156 info.num_derivates.Assign(num_derivates);
157 info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0);
158 const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)};
159
160 IR::Reg dest_reg{txd.dest_reg};
161 for (size_t element = 0; element < 4; ++element) {
162 if (((txd.mask >> element) & 1) == 0) {
163 continue;
164 }
165 v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
166 ++dest_reg;
167 }
168 if (txd.sparse_pred != IR::Pred::PT) {
169 v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
170 }
171}
172} // Anonymous namespace
173
174void TranslatorVisitor::TXD(u64 insn) {
175 Impl(*this, insn, false);
176}
177
178void TranslatorVisitor::TXD_b(u64 insn) {
179 Impl(*this, insn, true);
180}
181
182} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
new file mode 100644
index 000000000..983058303
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
@@ -0,0 +1,165 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26Shader::TextureType GetType(TextureType type) {
27 switch (type) {
28 case TextureType::_1D:
29 return Shader::TextureType::Color1D;
30 case TextureType::ARRAY_1D:
31 return Shader::TextureType::ColorArray1D;
32 case TextureType::_2D:
33 return Shader::TextureType::Color2D;
34 case TextureType::ARRAY_2D:
35 return Shader::TextureType::ColorArray2D;
36 case TextureType::_3D:
37 return Shader::TextureType::Color3D;
38 case TextureType::ARRAY_3D:
39 throw NotImplementedException("3D array texture type");
40 case TextureType::CUBE:
41 return Shader::TextureType::ColorCube;
42 case TextureType::ARRAY_CUBE:
43 return Shader::TextureType::ColorArrayCube;
44 }
45 throw NotImplementedException("Invalid texture type {}", type);
46}
47
48IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
49 const auto read_array{
50 [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }};
51 switch (type) {
52 case TextureType::_1D:
53 return v.X(reg);
54 case TextureType::ARRAY_1D:
55 return v.ir.CompositeConstruct(v.X(reg + 1), read_array());
56 case TextureType::_2D:
57 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
58 case TextureType::ARRAY_2D:
59 return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array());
60 case TextureType::_3D:
61 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
62 case TextureType::ARRAY_3D:
63 throw NotImplementedException("3D array texture type");
64 case TextureType::CUBE:
65 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
66 case TextureType::ARRAY_CUBE:
67 return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array());
68 }
69 throw NotImplementedException("Invalid texture type {}", type);
70}
71
72IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
73 const IR::U32 value{v.X(reg++)};
74 switch (type) {
75 case TextureType::_1D:
76 case TextureType::ARRAY_1D:
77 return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
78 case TextureType::_2D:
79 case TextureType::ARRAY_2D:
80 return v.ir.CompositeConstruct(
81 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
82 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
83 case TextureType::_3D:
84 case TextureType::ARRAY_3D:
85 return v.ir.CompositeConstruct(
86 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
87 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
88 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
89 case TextureType::CUBE:
90 case TextureType::ARRAY_CUBE:
91 throw NotImplementedException("Illegal offset on CUBE sample");
92 }
93 throw NotImplementedException("Invalid texture type {}", type);
94}
95
96void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
97 union {
98 u64 raw;
99 BitField<49, 1, u64> nodep;
100 BitField<55, 1, u64> lod;
101 BitField<50, 1, u64> multisample;
102 BitField<35, 1, u64> aoffi;
103 BitField<54, 1, u64> clamp;
104 BitField<51, 3, IR::Pred> sparse_pred;
105 BitField<0, 8, IR::Reg> dest_reg;
106 BitField<8, 8, IR::Reg> coord_reg;
107 BitField<20, 8, IR::Reg> meta_reg;
108 BitField<28, 3, TextureType> type;
109 BitField<31, 4, u64> mask;
110 BitField<36, 13, u64> cbuf_offset;
111 } const tld{insn};
112
113 const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)};
114
115 IR::Reg meta_reg{tld.meta_reg};
116 IR::Value handle;
117 IR::Value offset;
118 IR::U32 lod;
119 IR::U32 multisample;
120 if (is_bindless) {
121 handle = v.X(meta_reg++);
122 } else {
123 handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4));
124 }
125 if (tld.lod != 0) {
126 lod = v.X(meta_reg++);
127 } else {
128 lod = v.ir.Imm32(0U);
129 }
130 if (tld.aoffi != 0) {
131 offset = MakeOffset(v, meta_reg, tld.type);
132 }
133 if (tld.multisample != 0) {
134 multisample = v.X(meta_reg++);
135 }
136 if (tld.clamp != 0) {
137 throw NotImplementedException("TLD.CL - CLAMP is not implmented");
138 }
139 IR::TextureInstInfo info{};
140 info.type.Assign(GetType(tld.type));
141 const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)};
142
143 IR::Reg dest_reg{tld.dest_reg};
144 for (size_t element = 0; element < 4; ++element) {
145 if (((tld.mask >> element) & 1) == 0) {
146 continue;
147 }
148 v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
149 ++dest_reg;
150 }
151 if (tld.sparse_pred != IR::Pred::PT) {
152 v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
153 }
154}
155} // Anonymous namespace
156
157void TranslatorVisitor::TLD(u64 insn) {
158 Impl(*this, insn, false);
159}
160
161void TranslatorVisitor::TLD_b(u64 insn) {
162 Impl(*this, insn, true);
163}
164
165} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
new file mode 100644
index 000000000..5dd7e31b2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
@@ -0,0 +1,242 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Precision : u64 {
15 F16,
16 F32,
17};
18
19constexpr unsigned R = 1;
20constexpr unsigned G = 2;
21constexpr unsigned B = 4;
22constexpr unsigned A = 8;
23
24constexpr std::array RG_LUT{
25 R, //
26 G, //
27 B, //
28 A, //
29 R | G, //
30 R | A, //
31 G | A, //
32 B | A, //
33};
34
35constexpr std::array RGBA_LUT{
36 R | G | B, //
37 R | G | A, //
38 R | B | A, //
39 G | B | A, //
40 R | G | B | A, //
41};
42
43union Encoding {
44 u64 raw;
45 BitField<59, 1, Precision> precision;
46 BitField<54, 1, u64> aoffi;
47 BitField<53, 1, u64> lod;
48 BitField<55, 1, u64> ms;
49 BitField<49, 1, u64> nodep;
50 BitField<28, 8, IR::Reg> dest_reg_b;
51 BitField<0, 8, IR::Reg> dest_reg_a;
52 BitField<8, 8, IR::Reg> src_reg_a;
53 BitField<20, 8, IR::Reg> src_reg_b;
54 BitField<36, 13, u64> cbuf_offset;
55 BitField<50, 3, u64> swizzle;
56 BitField<53, 4, u64> encoding;
57};
58
59void CheckAlignment(IR::Reg reg, size_t alignment) {
60 if (!IR::IsAligned(reg, alignment)) {
61 throw NotImplementedException("Unaligned source register {}", reg);
62 }
63}
64
65IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
66 const IR::U32 value{v.X(reg)};
67 return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
68 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
69}
70
71IR::Value Sample(TranslatorVisitor& v, u64 insn) {
72 const Encoding tlds{insn};
73 const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))};
74 const IR::Reg reg_a{tlds.src_reg_a};
75 const IR::Reg reg_b{tlds.src_reg_b};
76 IR::Value coords;
77 IR::U32 lod{v.ir.Imm32(0U)};
78 IR::Value offsets;
79 IR::U32 multisample;
80 Shader::TextureType texture_type{};
81 switch (tlds.encoding) {
82 case 0:
83 texture_type = Shader::TextureType::Color1D;
84 coords = v.X(reg_a);
85 break;
86 case 1:
87 texture_type = Shader::TextureType::Color1D;
88 coords = v.X(reg_a);
89 lod = v.X(reg_b);
90 break;
91 case 2:
92 texture_type = Shader::TextureType::Color2D;
93 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b));
94 break;
95 case 4:
96 CheckAlignment(reg_a, 2);
97 texture_type = Shader::TextureType::Color2D;
98 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
99 offsets = MakeOffset(v, reg_b);
100 break;
101 case 5:
102 CheckAlignment(reg_a, 2);
103 texture_type = Shader::TextureType::Color2D;
104 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
105 lod = v.X(reg_b);
106 break;
107 case 6:
108 CheckAlignment(reg_a, 2);
109 texture_type = Shader::TextureType::Color2D;
110 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
111 multisample = v.X(reg_b);
112 break;
113 case 7:
114 CheckAlignment(reg_a, 2);
115 texture_type = Shader::TextureType::Color3D;
116 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b));
117 break;
118 case 8: {
119 CheckAlignment(reg_b, 2);
120 const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))};
121 texture_type = Shader::TextureType::ColorArray2D;
122 coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array);
123 break;
124 }
125 case 12:
126 CheckAlignment(reg_a, 2);
127 CheckAlignment(reg_b, 2);
128 texture_type = Shader::TextureType::Color2D;
129 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
130 lod = v.X(reg_b);
131 offsets = MakeOffset(v, reg_b + 1);
132 break;
133 default:
134 throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value());
135 }
136 IR::TextureInstInfo info{};
137 if (tlds.precision == Precision::F16) {
138 info.relaxed_precision.Assign(1);
139 }
140 info.type.Assign(texture_type);
141 return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info);
142}
143
144unsigned Swizzle(u64 insn) {
145 const Encoding tlds{insn};
146 const size_t encoding{tlds.swizzle};
147 if (tlds.dest_reg_b == IR::Reg::RZ) {
148 if (encoding >= RG_LUT.size()) {
149 throw NotImplementedException("Illegal RG encoding {}", encoding);
150 }
151 return RG_LUT[encoding];
152 } else {
153 if (encoding >= RGBA_LUT.size()) {
154 throw NotImplementedException("Illegal RGBA encoding {}", encoding);
155 }
156 return RGBA_LUT[encoding];
157 }
158}
159
160IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
161 return IR::F32{v.ir.CompositeExtract(sample, component)};
162}
163
164IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
165 const Encoding tlds{insn};
166 switch (index) {
167 case 0:
168 return tlds.dest_reg_a;
169 case 1:
170 CheckAlignment(tlds.dest_reg_a, 2);
171 return tlds.dest_reg_a + 1;
172 case 2:
173 return tlds.dest_reg_b;
174 case 3:
175 CheckAlignment(tlds.dest_reg_b, 2);
176 return tlds.dest_reg_b + 1;
177 }
178 throw LogicError("Invalid store index {}", index);
179}
180
181void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
182 const unsigned swizzle{Swizzle(insn)};
183 unsigned store_index{0};
184 for (unsigned component = 0; component < 4; ++component) {
185 if (((swizzle >> component) & 1) == 0) {
186 continue;
187 }
188 const IR::Reg dest{RegStoreComponent32(insn, store_index)};
189 v.F(dest, Extract(v, sample, component));
190 ++store_index;
191 }
192}
193
194IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
195 return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
196}
197
198void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
199 const unsigned swizzle{Swizzle(insn)};
200 unsigned store_index{0};
201 std::array<IR::F32, 4> swizzled;
202 for (unsigned component = 0; component < 4; ++component) {
203 if (((swizzle >> component) & 1) == 0) {
204 continue;
205 }
206 swizzled[store_index] = Extract(v, sample, component);
207 ++store_index;
208 }
209 const IR::F32 zero{v.ir.Imm32(0.0f)};
210 const Encoding tlds{insn};
211 switch (store_index) {
212 case 1:
213 v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero));
214 break;
215 case 2:
216 case 3:
217 case 4:
218 v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
219 switch (store_index) {
220 case 2:
221 break;
222 case 3:
223 v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero));
224 break;
225 case 4:
226 v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
227 break;
228 }
229 break;
230 }
231}
232} // Anonymous namespace
233
234void TranslatorVisitor::TLDS(u64 insn) {
235 const IR::Value sample{Sample(*this, insn)};
236 if (Encoding{insn}.precision == Precision::F32) {
237 Store32(*this, insn, sample);
238 } else {
239 Store16(*this, insn, sample);
240 }
241}
242} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
new file mode 100644
index 000000000..aea3c0e62
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
@@ -0,0 +1,131 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26Shader::TextureType GetType(TextureType type) {
27 switch (type) {
28 case TextureType::_1D:
29 return Shader::TextureType::Color1D;
30 case TextureType::ARRAY_1D:
31 return Shader::TextureType::ColorArray1D;
32 case TextureType::_2D:
33 return Shader::TextureType::Color2D;
34 case TextureType::ARRAY_2D:
35 return Shader::TextureType::ColorArray2D;
36 case TextureType::_3D:
37 return Shader::TextureType::Color3D;
38 case TextureType::ARRAY_3D:
39 throw NotImplementedException("3D array texture type");
40 case TextureType::CUBE:
41 return Shader::TextureType::ColorCube;
42 case TextureType::ARRAY_CUBE:
43 return Shader::TextureType::ColorArrayCube;
44 }
45 throw NotImplementedException("Invalid texture type {}", type);
46}
47
48IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
49 // The ISA reads an array component here, but this is not needed on high level shading languages
50 // We are dropping this information.
51 switch (type) {
52 case TextureType::_1D:
53 return v.F(reg);
54 case TextureType::ARRAY_1D:
55 return v.F(reg + 1);
56 case TextureType::_2D:
57 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
58 case TextureType::ARRAY_2D:
59 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2));
60 case TextureType::_3D:
61 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
62 case TextureType::ARRAY_3D:
63 throw NotImplementedException("3D array texture type");
64 case TextureType::CUBE:
65 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
66 case TextureType::ARRAY_CUBE:
67 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3));
68 }
69 throw NotImplementedException("Invalid texture type {}", type);
70}
71
72void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
73 union {
74 u64 raw;
75 BitField<49, 1, u64> nodep;
76 BitField<35, 1, u64> ndv;
77 BitField<0, 8, IR::Reg> dest_reg;
78 BitField<8, 8, IR::Reg> coord_reg;
79 BitField<20, 8, IR::Reg> meta_reg;
80 BitField<28, 3, TextureType> type;
81 BitField<31, 4, u64> mask;
82 BitField<36, 13, u64> cbuf_offset;
83 } const tmml{insn};
84
85 if ((tmml.mask & 0b1100) != 0) {
86 throw NotImplementedException("TMML BA results are not implmented");
87 }
88 const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)};
89
90 IR::U32 handle;
91 IR::Reg meta_reg{tmml.meta_reg};
92 if (is_bindless) {
93 handle = v.X(meta_reg++);
94 } else {
95 handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4));
96 }
97 IR::TextureInstInfo info{};
98 info.type.Assign(GetType(tmml.type));
99 const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)};
100
101 IR::Reg dest_reg{tmml.dest_reg};
102 for (size_t element = 0; element < 4; ++element) {
103 if (((tmml.mask >> element) & 1) == 0) {
104 continue;
105 }
106 IR::F32 value{v.ir.CompositeExtract(sample, element)};
107 if (element < 2) {
108 IR::U32 casted_value;
109 if (element == 0) {
110 casted_value = v.ir.ConvertFToU(32, value);
111 } else {
112 casted_value = v.ir.ConvertFToS(16, value);
113 }
114 v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8)));
115 } else {
116 v.F(dest_reg, value);
117 }
118 ++dest_reg;
119 }
120}
121} // Anonymous namespace
122
123void TranslatorVisitor::TMML(u64 insn) {
124 Impl(*this, insn, false);
125}
126
127void TranslatorVisitor::TMML_b(u64 insn) {
128 Impl(*this, insn, true);
129}
130
131} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
new file mode 100644
index 000000000..0459e5473
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
@@ -0,0 +1,76 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Mode : u64 {
15 Dimension = 1,
16 TextureType = 2,
17 SamplePos = 5,
18};
19
20IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) {
21 switch (mode) {
22 case Mode::Dimension: {
23 const IR::U32 lod{v.X(src_reg)};
24 return v.ir.ImageQueryDimension(handle, lod);
25 }
26 case Mode::TextureType:
27 case Mode::SamplePos:
28 default:
29 throw NotImplementedException("Mode {}", mode);
30 }
31}
32
33void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) {
34 union {
35 u64 raw;
36 BitField<49, 1, u64> nodep;
37 BitField<0, 8, IR::Reg> dest_reg;
38 BitField<8, 8, IR::Reg> src_reg;
39 BitField<22, 3, Mode> mode;
40 BitField<31, 4, u64> mask;
41 } const txq{insn};
42
43 IR::Reg src_reg{txq.src_reg};
44 IR::U32 handle;
45 if (cbuf_offset) {
46 handle = v.ir.Imm32(*cbuf_offset);
47 } else {
48 handle = v.X(src_reg);
49 ++src_reg;
50 }
51 const IR::Value query{Query(v, handle, txq.mode, src_reg)};
52 IR::Reg dest_reg{txq.dest_reg};
53 for (int element = 0; element < 4; ++element) {
54 if (((txq.mask >> element) & 1) == 0) {
55 continue;
56 }
57 v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))});
58 ++dest_reg;
59 }
60}
61} // Anonymous namespace
62
63void TranslatorVisitor::TXQ(u64 insn) {
64 union {
65 u64 raw;
66 BitField<36, 13, u64> cbuf_offset;
67 } const txq{insn};
68
69 Impl(*this, insn, static_cast<u32>(txq.cbuf_offset * 4));
70}
71
72void TranslatorVisitor::TXQ_b(u64 insn) {
73 Impl(*this, insn, std::nullopt);
74}
75
76} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
new file mode 100644
index 000000000..e1f4174cf
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
@@ -0,0 +1,30 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/exception.h"
6#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
7
8namespace Shader::Maxwell {
9
10IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width,
11 u32 selector, bool is_signed) {
12 switch (width) {
13 case VideoWidth::Byte:
14 case VideoWidth::Unknown:
15 return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed);
16 case VideoWidth::Short:
17 return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed);
18 case VideoWidth::Word:
19 return value;
20 default:
21 throw NotImplementedException("Unknown VideoWidth {}", width);
22 }
23}
24
25VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) {
26 // immediates must be 16-bit format.
27 return is_immediate ? VideoWidth::Short : width;
28}
29
30} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
new file mode 100644
index 000000000..40c0b907c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
@@ -0,0 +1,23 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11enum class VideoWidth : u64 {
12 Byte,
13 Unknown,
14 Short,
15 Word,
16};
17
18[[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value,
19 VideoWidth width, u32 selector, bool is_signed);
20
21[[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate);
22
23} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
new file mode 100644
index 000000000..78869601f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
@@ -0,0 +1,92 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
9
10namespace Shader::Maxwell {
11namespace {
12enum class VideoMinMaxOps : u64 {
13 MRG_16H,
14 MRG_16L,
15 MRG_8B0,
16 MRG_8B2,
17 ACC,
18 MIN,
19 MAX,
20};
21
22[[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs,
23 VideoMinMaxOps op, bool is_signed) {
24 switch (op) {
25 case VideoMinMaxOps::MIN:
26 return ir.IMin(lhs, rhs, is_signed);
27 case VideoMinMaxOps::MAX:
28 return ir.IMax(lhs, rhs, is_signed);
29 default:
30 throw NotImplementedException("VMNMX op {}", op);
31 }
32}
33} // Anonymous namespace
34
35void TranslatorVisitor::VMNMX(u64 insn) {
36 union {
37 u64 raw;
38 BitField<0, 8, IR::Reg> dest_reg;
39 BitField<20, 16, u64> src_b_imm;
40 BitField<28, 2, u64> src_b_selector;
41 BitField<29, 2, VideoWidth> src_b_width;
42 BitField<36, 2, u64> src_a_selector;
43 BitField<37, 2, VideoWidth> src_a_width;
44 BitField<47, 1, u64> cc;
45 BitField<48, 1, u64> src_a_sign;
46 BitField<49, 1, u64> src_b_sign;
47 BitField<50, 1, u64> is_src_b_reg;
48 BitField<51, 3, VideoMinMaxOps> op;
49 BitField<54, 1, u64> dest_sign;
50 BitField<55, 1, u64> sat;
51 BitField<56, 1, u64> mx;
52 } const vmnmx{insn};
53
54 if (vmnmx.cc != 0) {
55 throw NotImplementedException("VMNMX CC");
56 }
57 if (vmnmx.sat != 0) {
58 throw NotImplementedException("VMNMX SAT");
59 }
60 // Selectors were shown to default to 2 in unit tests
61 if (vmnmx.src_a_selector != 2) {
62 throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value());
63 }
64 if (vmnmx.src_b_selector != 2) {
65 throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value());
66 }
67 if (vmnmx.src_a_width != VideoWidth::Word) {
68 throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value());
69 }
70
71 const bool is_b_imm{vmnmx.is_src_b_reg == 0};
72 const IR::U32 src_a{GetReg8(insn)};
73 const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)};
74 const IR::U32 src_c{GetReg39(insn)};
75
76 const VideoWidth a_width{vmnmx.src_a_width};
77 const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)};
78
79 const bool src_a_signed{vmnmx.src_a_sign != 0};
80 const bool src_b_signed{vmnmx.src_b_sign != 0};
81 const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)};
82 const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)};
83
84 // First operation's sign is only dependent on operand b's sign
85 const bool op_1_signed{src_b_signed};
86
87 const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed)
88 : ir.IMin(op_a, op_b, op_1_signed)};
89 X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0));
90}
91
92} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
new file mode 100644
index 000000000..cc2e6d6e6
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
@@ -0,0 +1,64 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::VMAD(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<20, 16, u64> src_b_imm;
16 BitField<28, 2, u64> src_b_selector;
17 BitField<29, 2, VideoWidth> src_b_width;
18 BitField<36, 2, u64> src_a_selector;
19 BitField<37, 2, VideoWidth> src_a_width;
20 BitField<47, 1, u64> cc;
21 BitField<48, 1, u64> src_a_sign;
22 BitField<49, 1, u64> src_b_sign;
23 BitField<50, 1, u64> is_src_b_reg;
24 BitField<51, 2, u64> scale;
25 BitField<53, 1, u64> src_c_neg;
26 BitField<54, 1, u64> src_a_neg;
27 BitField<55, 1, u64> sat;
28 } const vmad{insn};
29
30 if (vmad.cc != 0) {
31 throw NotImplementedException("VMAD CC");
32 }
33 if (vmad.sat != 0) {
34 throw NotImplementedException("VMAD SAT");
35 }
36 if (vmad.scale != 0) {
37 throw NotImplementedException("VMAD SCALE");
38 }
39 if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) {
40 throw NotImplementedException("VMAD PO");
41 }
42 if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) {
43 throw NotImplementedException("VMAD NEG");
44 }
45 const bool is_b_imm{vmad.is_src_b_reg == 0};
46 const IR::U32 src_a{GetReg8(insn)};
47 const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)};
48 const IR::U32 src_c{GetReg39(insn)};
49
50 const u32 a_selector{static_cast<u32>(vmad.src_a_selector)};
51 // Immediate values can't have a selector
52 const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)};
53 const VideoWidth a_width{vmad.src_a_width};
54 const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)};
55
56 const bool src_a_signed{vmad.src_a_sign != 0};
57 const bool src_b_signed{vmad.src_b_sign != 0};
58 const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
59 const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
60
61 X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c));
62}
63
64} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
new file mode 100644
index 000000000..1b66abc33
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
@@ -0,0 +1,92 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class VsetpCompareOp : u64 {
14 False = 0,
15 LessThan,
16 Equal,
17 LessThanEqual,
18 GreaterThan = 16,
19 NotEqual,
20 GreaterThanEqual,
21 True,
22};
23
24CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) {
25 switch (op) {
26 case VsetpCompareOp::False:
27 return CompareOp::False;
28 case VsetpCompareOp::LessThan:
29 return CompareOp::LessThan;
30 case VsetpCompareOp::Equal:
31 return CompareOp::Equal;
32 case VsetpCompareOp::LessThanEqual:
33 return CompareOp::LessThanEqual;
34 case VsetpCompareOp::GreaterThan:
35 return CompareOp::GreaterThan;
36 case VsetpCompareOp::NotEqual:
37 return CompareOp::NotEqual;
38 case VsetpCompareOp::GreaterThanEqual:
39 return CompareOp::GreaterThanEqual;
40 case VsetpCompareOp::True:
41 return CompareOp::True;
42 default:
43 throw NotImplementedException("Invalid compare op {}", op);
44 }
45}
46} // Anonymous namespace
47
48void TranslatorVisitor::VSETP(u64 insn) {
49 union {
50 u64 raw;
51 BitField<0, 3, IR::Pred> dest_pred_b;
52 BitField<3, 3, IR::Pred> dest_pred_a;
53 BitField<20, 16, u64> src_b_imm;
54 BitField<28, 2, u64> src_b_selector;
55 BitField<29, 2, VideoWidth> src_b_width;
56 BitField<36, 2, u64> src_a_selector;
57 BitField<37, 2, VideoWidth> src_a_width;
58 BitField<39, 3, IR::Pred> bop_pred;
59 BitField<42, 1, u64> neg_bop_pred;
60 BitField<43, 5, VsetpCompareOp> compare_op;
61 BitField<45, 2, BooleanOp> bop;
62 BitField<48, 1, u64> src_a_sign;
63 BitField<49, 1, u64> src_b_sign;
64 BitField<50, 1, u64> is_src_b_reg;
65 } const vsetp{insn};
66
67 const bool is_b_imm{vsetp.is_src_b_reg == 0};
68 const IR::U32 src_a{GetReg8(insn)};
69 const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)};
70
71 const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)};
72 const u32 b_selector{static_cast<u32>(vsetp.src_b_selector)};
73 const VideoWidth a_width{vsetp.src_a_width};
74 const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)};
75
76 const bool src_a_signed{vsetp.src_a_sign != 0};
77 const bool src_b_signed{vsetp.src_b_sign != 0};
78 const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
79 const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
80
81 // Compare operation's sign is only dependent on operand b's sign
82 const bool compare_signed{src_b_signed};
83 const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)};
84 const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)};
85 const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)};
86 const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)};
87 const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)};
88 ir.SetPred(vsetp.dest_pred_a, result_a);
89 ir.SetPred(vsetp.dest_pred_b, result_b);
90}
91
92} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
new file mode 100644
index 000000000..7ce370f09
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
@@ -0,0 +1,54 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class VoteOp : u64 {
12 ALL,
13 ANY,
14 EQ,
15};
16
17[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) {
18 switch (vote_op) {
19 case VoteOp::ALL:
20 return ir.VoteAll(pred);
21 case VoteOp::ANY:
22 return ir.VoteAny(pred);
23 case VoteOp::EQ:
24 return ir.VoteEqual(pred);
25 default:
26 throw NotImplementedException("Invalid VOTE op {}", vote_op);
27 }
28}
29
30void Vote(TranslatorVisitor& v, u64 insn) {
31 union {
32 u64 insn;
33 BitField<0, 8, IR::Reg> dest_reg;
34 BitField<39, 3, IR::Pred> pred_a;
35 BitField<42, 1, u64> neg_pred_a;
36 BitField<45, 3, IR::Pred> pred_b;
37 BitField<48, 2, VoteOp> vote_op;
38 } const vote{insn};
39
40 const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)};
41 v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op));
42 v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred));
43}
44} // Anonymous namespace
45
46void TranslatorVisitor::VOTE(u64 insn) {
47 Vote(*this, insn);
48}
49
50void TranslatorVisitor::VOTE_vtg(u64) {
51 LOG_WARNING(Shader, "(STUBBED) called");
52}
53
54} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
new file mode 100644
index 000000000..550fed55c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
@@ -0,0 +1,69 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class ShuffleMode : u64 {
14 IDX,
15 UP,
16 DOWN,
17 BFLY,
18};
19
20[[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value,
21 const IR::U32& index, const IR::U32& mask,
22 ShuffleMode shfl_op) {
23 const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))};
24 const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))};
25 switch (shfl_op) {
26 case ShuffleMode::IDX:
27 return ir.ShuffleIndex(value, index, clamp, seg_mask);
28 case ShuffleMode::UP:
29 return ir.ShuffleUp(value, index, clamp, seg_mask);
30 case ShuffleMode::DOWN:
31 return ir.ShuffleDown(value, index, clamp, seg_mask);
32 case ShuffleMode::BFLY:
33 return ir.ShuffleButterfly(value, index, clamp, seg_mask);
34 default:
35 throw NotImplementedException("Invalid SHFL op {}", shfl_op);
36 }
37}
38
39void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) {
40 union {
41 u64 insn;
42 BitField<0, 8, IR::Reg> dest_reg;
43 BitField<8, 8, IR::Reg> src_reg;
44 BitField<30, 2, ShuffleMode> mode;
45 BitField<48, 3, IR::Pred> pred;
46 } const shfl{insn};
47
48 const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)};
49 v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result));
50 v.X(shfl.dest_reg, result);
51}
52} // Anonymous namespace
53
54void TranslatorVisitor::SHFL(u64 insn) {
55 union {
56 u64 insn;
57 BitField<20, 5, u64> src_a_imm;
58 BitField<28, 1, u64> src_a_flag;
59 BitField<29, 1, u64> src_b_flag;
60 BitField<34, 13, u64> src_b_imm;
61 } const flags{insn};
62 const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm))
63 : GetReg20(insn)};
64 const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm))
65 : GetReg39(insn)};
66 Shuffle(*this, insn, src_a, src_b);
67}
68
69} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
new file mode 100644
index 000000000..8e3c4c5d5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
@@ -0,0 +1,52 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/environment.h"
6#include "shader_recompiler/frontend/ir/basic_block.h"
7#include "shader_recompiler/frontend/maxwell/decode.h"
8#include "shader_recompiler/frontend/maxwell/location.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10#include "shader_recompiler/frontend/maxwell/translate/translate.h"
11
12namespace Shader::Maxwell {
13
14template <auto method>
15static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) {
16 using MethodType = decltype(method);
17 if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, Location, u64>) {
18 (visitor.*method)(pc, insn);
19 } else if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, u64>) {
20 (visitor.*method)(insn);
21 } else {
22 (visitor.*method)();
23 }
24}
25
26void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) {
27 if (location_begin == location_end) {
28 return;
29 }
30 TranslatorVisitor visitor{env, *block};
31 for (Location pc = location_begin; pc != location_end; ++pc) {
32 const u64 insn{env.ReadInstruction(pc.Offset())};
33 try {
34 const Opcode opcode{Decode(insn)};
35 switch (opcode) {
36#define INST(name, cute, mask) \
37 case Opcode::name: \
38 Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \
39 break;
40#include "shader_recompiler/frontend/maxwell/maxwell.inc"
41#undef OPCODE
42 default:
43 throw LogicError("Invalid opcode {}", opcode);
44 }
45 } catch (Exception& exception) {
46 exception.Prepend(fmt::format("Translate {}: ", Decode(insn)));
47 throw;
48 }
49 }
50}
51
52} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h
new file mode 100644
index 000000000..a3edd2e46
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h
@@ -0,0 +1,14 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/environment.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9
10namespace Shader::Maxwell {
11
12void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end);
13
14} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
new file mode 100644
index 000000000..c067d459c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -0,0 +1,223 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7#include <vector>
8
9#include "common/settings.h"
10#include "shader_recompiler/exception.h"
11#include "shader_recompiler/frontend/ir/basic_block.h"
12#include "shader_recompiler/frontend/ir/post_order.h"
13#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
14#include "shader_recompiler/frontend/maxwell/translate/translate.h"
15#include "shader_recompiler/frontend/maxwell/translate_program.h"
16#include "shader_recompiler/host_translate_info.h"
17#include "shader_recompiler/ir_opt/passes.h"
18
19namespace Shader::Maxwell {
20namespace {
21IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
22 size_t num_syntax_blocks{};
23 for (const auto& node : syntax_list) {
24 if (node.type == IR::AbstractSyntaxNode::Type::Block) {
25 ++num_syntax_blocks;
26 }
27 }
28 IR::BlockList blocks;
29 blocks.reserve(num_syntax_blocks);
30 for (const auto& node : syntax_list) {
31 if (node.type == IR::AbstractSyntaxNode::Type::Block) {
32 blocks.push_back(node.data.block);
33 }
34 }
35 return blocks;
36}
37
38void RemoveUnreachableBlocks(IR::Program& program) {
39 // Some blocks might be unreachable if a function call exists unconditionally
40 // If this happens the number of blocks and post order blocks will mismatch
41 if (program.blocks.size() == program.post_order_blocks.size()) {
42 return;
43 }
44 const auto begin{program.blocks.begin() + 1};
45 const auto end{program.blocks.end()};
46 const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }};
47 program.blocks.erase(std::remove_if(begin, end, pred), end);
48}
49
50void CollectInterpolationInfo(Environment& env, IR::Program& program) {
51 if (program.stage != Stage::Fragment) {
52 return;
53 }
54 const ProgramHeader& sph{env.SPH()};
55 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
56 std::optional<PixelImap> imap;
57 for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) {
58 if (value == PixelImap::Unused) {
59 continue;
60 }
61 if (imap && imap != value) {
62 throw NotImplementedException("Per component interpolation");
63 }
64 imap = value;
65 }
66 if (!imap) {
67 continue;
68 }
69 program.info.interpolation[index] = [&] {
70 switch (*imap) {
71 case PixelImap::Unused:
72 case PixelImap::Perspective:
73 return Interpolation::Smooth;
74 case PixelImap::Constant:
75 return Interpolation::Flat;
76 case PixelImap::ScreenLinear:
77 return Interpolation::NoPerspective;
78 }
79 throw NotImplementedException("Unknown interpolation {}", *imap);
80 }();
81 }
82}
83
84void AddNVNStorageBuffers(IR::Program& program) {
85 if (!program.info.uses_global_memory) {
86 return;
87 }
88 const u32 driver_cbuf{0};
89 const u32 descriptor_size{0x10};
90 const u32 num_buffers{16};
91 const u32 base{[&] {
92 switch (program.stage) {
93 case Stage::VertexA:
94 case Stage::VertexB:
95 return 0x110u;
96 case Stage::TessellationControl:
97 return 0x210u;
98 case Stage::TessellationEval:
99 return 0x310u;
100 case Stage::Geometry:
101 return 0x410u;
102 case Stage::Fragment:
103 return 0x510u;
104 case Stage::Compute:
105 return 0x310u;
106 }
107 throw InvalidArgument("Invalid stage {}", program.stage);
108 }()};
109 auto& descs{program.info.storage_buffers_descriptors};
110 for (u32 index = 0; index < num_buffers; ++index) {
111 if (!program.info.nvn_buffer_used[index]) {
112 continue;
113 }
114 const u32 offset{base + index * descriptor_size};
115 const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
116 if (it != descs.end()) {
117 it->is_written |= program.info.stores_global_memory;
118 continue;
119 }
120 descs.push_back({
121 .cbuf_index = driver_cbuf,
122 .cbuf_offset = offset,
123 .count = 1,
124 .is_written = program.info.stores_global_memory,
125 });
126 }
127}
128} // Anonymous namespace
129
130IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
131 Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
132 IR::Program program;
133 program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg);
134 program.blocks = GenerateBlocks(program.syntax_list);
135 program.post_order_blocks = PostOrder(program.syntax_list.front());
136 program.stage = env.ShaderStage();
137 program.local_memory_size = env.LocalMemorySize();
138 switch (program.stage) {
139 case Stage::TessellationControl: {
140 const ProgramHeader& sph{env.SPH()};
141 program.invocations = sph.common2.threads_per_input_primitive;
142 break;
143 }
144 case Stage::Geometry: {
145 const ProgramHeader& sph{env.SPH()};
146 program.output_topology = sph.common3.output_topology;
147 program.output_vertices = sph.common4.max_output_vertices;
148 program.invocations = sph.common2.threads_per_input_primitive;
149 program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0;
150 if (program.is_geometry_passthrough) {
151 const auto& mask{env.GpPassthroughMask()};
152 for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) {
153 program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
154 }
155 }
156 break;
157 }
158 case Stage::Compute:
159 program.workgroup_size = env.WorkgroupSize();
160 program.shared_memory_size = env.SharedMemorySize();
161 break;
162 default:
163 break;
164 }
165 RemoveUnreachableBlocks(program);
166
167 // Replace instructions before the SSA rewrite
168 if (!host_info.support_float16) {
169 Optimization::LowerFp16ToFp32(program);
170 }
171 if (!host_info.support_int64) {
172 Optimization::LowerInt64ToInt32(program);
173 }
174 Optimization::SsaRewritePass(program);
175
176 Optimization::GlobalMemoryToStorageBufferPass(program);
177 Optimization::TexturePass(env, program);
178
179 Optimization::ConstantPropagationPass(program);
180 Optimization::DeadCodeEliminationPass(program);
181 if (Settings::values.renderer_debug) {
182 Optimization::VerificationPass(program);
183 }
184 Optimization::CollectShaderInfoPass(env, program);
185 CollectInterpolationInfo(env, program);
186 AddNVNStorageBuffers(program);
187 return program;
188}
189
190IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
191 Environment& env_vertex_b) {
192 IR::Program result{};
193 Optimization::VertexATransformPass(vertex_a);
194 Optimization::VertexBTransformPass(vertex_b);
195 for (const auto& term : vertex_a.syntax_list) {
196 if (term.type != IR::AbstractSyntaxNode::Type::Return) {
197 result.syntax_list.push_back(term);
198 }
199 }
200 result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(),
201 vertex_b.syntax_list.end());
202 result.blocks = GenerateBlocks(result.syntax_list);
203 result.post_order_blocks = vertex_b.post_order_blocks;
204 for (const auto& block : vertex_a.post_order_blocks) {
205 result.post_order_blocks.push_back(block);
206 }
207 result.stage = Stage::VertexB;
208 result.info = vertex_a.info;
209 result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size);
210 result.info.loads.mask |= vertex_b.info.loads.mask;
211 result.info.stores.mask |= vertex_b.info.stores.mask;
212
213 Optimization::JoinTextureInfo(result.info, vertex_b.info);
214 Optimization::JoinStorageInfo(result.info, vertex_b.info);
215 Optimization::DeadCodeEliminationPass(result);
216 if (Settings::values.renderer_debug) {
217 Optimization::VerificationPass(result);
218 }
219 Optimization::CollectShaderInfoPass(env_vertex_b, result);
220 return result;
221}
222
223} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h
new file mode 100644
index 000000000..a84814811
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.h
@@ -0,0 +1,23 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/environment.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9#include "shader_recompiler/frontend/ir/program.h"
10#include "shader_recompiler/frontend/maxwell/control_flow.h"
11#include "shader_recompiler/host_translate_info.h"
12#include "shader_recompiler/object_pool.h"
13
14namespace Shader::Maxwell {
15
16[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
17 ObjectPool<IR::Block>& block_pool, Environment& env,
18 Flow::CFG& cfg, const HostTranslateInfo& host_info);
19
20[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
21 Environment& env_vertex_b);
22
23} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
new file mode 100644
index 000000000..94a584219
--- /dev/null
+++ b/src/shader_recompiler/host_translate_info.h
@@ -0,0 +1,18 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7namespace Shader {
8
9// Try to keep entries here to a minimum
10// They can accidentally change the cached information in a shader
11
12/// Misc information about the host
13struct HostTranslateInfo {
14 bool support_float16{}; ///< True when the device supports 16-bit floats
15 bool support_int64{}; ///< True when the device supports 64-bit integers
16};
17
18} // namespace Shader
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
new file mode 100644
index 000000000..5ead930f1
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -0,0 +1,928 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/alignment.h"
6#include "shader_recompiler/environment.h"
7#include "shader_recompiler/frontend/ir/modifiers.h"
8#include "shader_recompiler/frontend/ir/program.h"
9#include "shader_recompiler/frontend/ir/value.h"
10#include "shader_recompiler/ir_opt/passes.h"
11#include "shader_recompiler/shader_info.h"
12
13namespace Shader::Optimization {
14namespace {
15void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
16 if (count != 1) {
17 throw NotImplementedException("Constant buffer descriptor indexing");
18 }
19 if ((info.constant_buffer_mask & (1U << index)) != 0) {
20 return;
21 }
22 info.constant_buffer_mask |= 1U << index;
23
24 auto& cbufs{info.constant_buffer_descriptors};
25 cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index),
26 ConstantBufferDescriptor{
27 .index = index,
28 .count = 1,
29 });
30}
31
32void GetPatch(Info& info, IR::Patch patch) {
33 if (!IR::IsGeneric(patch)) {
34 throw NotImplementedException("Reading non-generic patch {}", patch);
35 }
36 info.uses_patches.at(IR::GenericPatchIndex(patch)) = true;
37}
38
39void SetPatch(Info& info, IR::Patch patch) {
40 if (IR::IsGeneric(patch)) {
41 info.uses_patches.at(IR::GenericPatchIndex(patch)) = true;
42 return;
43 }
44 switch (patch) {
45 case IR::Patch::TessellationLodLeft:
46 case IR::Patch::TessellationLodTop:
47 case IR::Patch::TessellationLodRight:
48 case IR::Patch::TessellationLodBottom:
49 info.stores_tess_level_outer = true;
50 break;
51 case IR::Patch::TessellationLodInteriorU:
52 case IR::Patch::TessellationLodInteriorV:
53 info.stores_tess_level_inner = true;
54 break;
55 default:
56 throw NotImplementedException("Set patch {}", patch);
57 }
58}
59
60void CheckCBufNVN(Info& info, IR::Inst& inst) {
61 const IR::Value cbuf_index{inst.Arg(0)};
62 if (!cbuf_index.IsImmediate()) {
63 info.nvn_buffer_used.set();
64 return;
65 }
66 const u32 index{cbuf_index.U32()};
67 if (index != 0) {
68 return;
69 }
70 const IR::Value cbuf_offset{inst.Arg(1)};
71 if (!cbuf_offset.IsImmediate()) {
72 info.nvn_buffer_used.set();
73 return;
74 }
75 const u32 offset{cbuf_offset.U32()};
76 const u32 descriptor_size{0x10};
77 const u32 upper_limit{info.nvn_buffer_base + descriptor_size * 16};
78 if (offset >= info.nvn_buffer_base && offset < upper_limit) {
79 const std::size_t nvn_index{(offset - info.nvn_buffer_base) / descriptor_size};
80 info.nvn_buffer_used.set(nvn_index, true);
81 }
82}
83
84void VisitUsages(Info& info, IR::Inst& inst) {
85 switch (inst.GetOpcode()) {
86 case IR::Opcode::CompositeConstructF16x2:
87 case IR::Opcode::CompositeConstructF16x3:
88 case IR::Opcode::CompositeConstructF16x4:
89 case IR::Opcode::CompositeExtractF16x2:
90 case IR::Opcode::CompositeExtractF16x3:
91 case IR::Opcode::CompositeExtractF16x4:
92 case IR::Opcode::CompositeInsertF16x2:
93 case IR::Opcode::CompositeInsertF16x3:
94 case IR::Opcode::CompositeInsertF16x4:
95 case IR::Opcode::SelectF16:
96 case IR::Opcode::BitCastU16F16:
97 case IR::Opcode::BitCastF16U16:
98 case IR::Opcode::PackFloat2x16:
99 case IR::Opcode::UnpackFloat2x16:
100 case IR::Opcode::ConvertS16F16:
101 case IR::Opcode::ConvertS32F16:
102 case IR::Opcode::ConvertS64F16:
103 case IR::Opcode::ConvertU16F16:
104 case IR::Opcode::ConvertU32F16:
105 case IR::Opcode::ConvertU64F16:
106 case IR::Opcode::ConvertF16S8:
107 case IR::Opcode::ConvertF16S16:
108 case IR::Opcode::ConvertF16S32:
109 case IR::Opcode::ConvertF16S64:
110 case IR::Opcode::ConvertF16U8:
111 case IR::Opcode::ConvertF16U16:
112 case IR::Opcode::ConvertF16U32:
113 case IR::Opcode::ConvertF16U64:
114 case IR::Opcode::FPAbs16:
115 case IR::Opcode::FPAdd16:
116 case IR::Opcode::FPCeil16:
117 case IR::Opcode::FPFloor16:
118 case IR::Opcode::FPFma16:
119 case IR::Opcode::FPMul16:
120 case IR::Opcode::FPNeg16:
121 case IR::Opcode::FPRoundEven16:
122 case IR::Opcode::FPSaturate16:
123 case IR::Opcode::FPClamp16:
124 case IR::Opcode::FPTrunc16:
125 case IR::Opcode::FPOrdEqual16:
126 case IR::Opcode::FPUnordEqual16:
127 case IR::Opcode::FPOrdNotEqual16:
128 case IR::Opcode::FPUnordNotEqual16:
129 case IR::Opcode::FPOrdLessThan16:
130 case IR::Opcode::FPUnordLessThan16:
131 case IR::Opcode::FPOrdGreaterThan16:
132 case IR::Opcode::FPUnordGreaterThan16:
133 case IR::Opcode::FPOrdLessThanEqual16:
134 case IR::Opcode::FPUnordLessThanEqual16:
135 case IR::Opcode::FPOrdGreaterThanEqual16:
136 case IR::Opcode::FPUnordGreaterThanEqual16:
137 case IR::Opcode::FPIsNan16:
138 case IR::Opcode::GlobalAtomicAddF16x2:
139 case IR::Opcode::GlobalAtomicMinF16x2:
140 case IR::Opcode::GlobalAtomicMaxF16x2:
141 case IR::Opcode::StorageAtomicAddF16x2:
142 case IR::Opcode::StorageAtomicMinF16x2:
143 case IR::Opcode::StorageAtomicMaxF16x2:
144 info.uses_fp16 = true;
145 break;
146 case IR::Opcode::CompositeConstructF64x2:
147 case IR::Opcode::CompositeConstructF64x3:
148 case IR::Opcode::CompositeConstructF64x4:
149 case IR::Opcode::CompositeExtractF64x2:
150 case IR::Opcode::CompositeExtractF64x3:
151 case IR::Opcode::CompositeExtractF64x4:
152 case IR::Opcode::CompositeInsertF64x2:
153 case IR::Opcode::CompositeInsertF64x3:
154 case IR::Opcode::CompositeInsertF64x4:
155 case IR::Opcode::SelectF64:
156 case IR::Opcode::BitCastU64F64:
157 case IR::Opcode::BitCastF64U64:
158 case IR::Opcode::PackDouble2x32:
159 case IR::Opcode::UnpackDouble2x32:
160 case IR::Opcode::FPAbs64:
161 case IR::Opcode::FPAdd64:
162 case IR::Opcode::FPCeil64:
163 case IR::Opcode::FPFloor64:
164 case IR::Opcode::FPFma64:
165 case IR::Opcode::FPMax64:
166 case IR::Opcode::FPMin64:
167 case IR::Opcode::FPMul64:
168 case IR::Opcode::FPNeg64:
169 case IR::Opcode::FPRecip64:
170 case IR::Opcode::FPRecipSqrt64:
171 case IR::Opcode::FPRoundEven64:
172 case IR::Opcode::FPSaturate64:
173 case IR::Opcode::FPClamp64:
174 case IR::Opcode::FPTrunc64:
175 case IR::Opcode::FPOrdEqual64:
176 case IR::Opcode::FPUnordEqual64:
177 case IR::Opcode::FPOrdNotEqual64:
178 case IR::Opcode::FPUnordNotEqual64:
179 case IR::Opcode::FPOrdLessThan64:
180 case IR::Opcode::FPUnordLessThan64:
181 case IR::Opcode::FPOrdGreaterThan64:
182 case IR::Opcode::FPUnordGreaterThan64:
183 case IR::Opcode::FPOrdLessThanEqual64:
184 case IR::Opcode::FPUnordLessThanEqual64:
185 case IR::Opcode::FPOrdGreaterThanEqual64:
186 case IR::Opcode::FPUnordGreaterThanEqual64:
187 case IR::Opcode::FPIsNan64:
188 case IR::Opcode::ConvertS16F64:
189 case IR::Opcode::ConvertS32F64:
190 case IR::Opcode::ConvertS64F64:
191 case IR::Opcode::ConvertU16F64:
192 case IR::Opcode::ConvertU32F64:
193 case IR::Opcode::ConvertU64F64:
194 case IR::Opcode::ConvertF32F64:
195 case IR::Opcode::ConvertF64F32:
196 case IR::Opcode::ConvertF64S8:
197 case IR::Opcode::ConvertF64S16:
198 case IR::Opcode::ConvertF64S32:
199 case IR::Opcode::ConvertF64S64:
200 case IR::Opcode::ConvertF64U8:
201 case IR::Opcode::ConvertF64U16:
202 case IR::Opcode::ConvertF64U32:
203 case IR::Opcode::ConvertF64U64:
204 info.uses_fp64 = true;
205 break;
206 default:
207 break;
208 }
209 switch (inst.GetOpcode()) {
210 case IR::Opcode::GetCbufU8:
211 case IR::Opcode::GetCbufS8:
212 case IR::Opcode::UndefU8:
213 case IR::Opcode::LoadGlobalU8:
214 case IR::Opcode::LoadGlobalS8:
215 case IR::Opcode::WriteGlobalU8:
216 case IR::Opcode::WriteGlobalS8:
217 case IR::Opcode::LoadStorageU8:
218 case IR::Opcode::LoadStorageS8:
219 case IR::Opcode::WriteStorageU8:
220 case IR::Opcode::WriteStorageS8:
221 case IR::Opcode::LoadSharedU8:
222 case IR::Opcode::LoadSharedS8:
223 case IR::Opcode::WriteSharedU8:
224 case IR::Opcode::SelectU8:
225 case IR::Opcode::ConvertF16S8:
226 case IR::Opcode::ConvertF16U8:
227 case IR::Opcode::ConvertF32S8:
228 case IR::Opcode::ConvertF32U8:
229 case IR::Opcode::ConvertF64S8:
230 case IR::Opcode::ConvertF64U8:
231 info.uses_int8 = true;
232 break;
233 default:
234 break;
235 }
236 switch (inst.GetOpcode()) {
237 case IR::Opcode::GetCbufU16:
238 case IR::Opcode::GetCbufS16:
239 case IR::Opcode::UndefU16:
240 case IR::Opcode::LoadGlobalU16:
241 case IR::Opcode::LoadGlobalS16:
242 case IR::Opcode::WriteGlobalU16:
243 case IR::Opcode::WriteGlobalS16:
244 case IR::Opcode::LoadStorageU16:
245 case IR::Opcode::LoadStorageS16:
246 case IR::Opcode::WriteStorageU16:
247 case IR::Opcode::WriteStorageS16:
248 case IR::Opcode::LoadSharedU16:
249 case IR::Opcode::LoadSharedS16:
250 case IR::Opcode::WriteSharedU16:
251 case IR::Opcode::SelectU16:
252 case IR::Opcode::BitCastU16F16:
253 case IR::Opcode::BitCastF16U16:
254 case IR::Opcode::ConvertS16F16:
255 case IR::Opcode::ConvertS16F32:
256 case IR::Opcode::ConvertS16F64:
257 case IR::Opcode::ConvertU16F16:
258 case IR::Opcode::ConvertU16F32:
259 case IR::Opcode::ConvertU16F64:
260 case IR::Opcode::ConvertF16S16:
261 case IR::Opcode::ConvertF16U16:
262 case IR::Opcode::ConvertF32S16:
263 case IR::Opcode::ConvertF32U16:
264 case IR::Opcode::ConvertF64S16:
265 case IR::Opcode::ConvertF64U16:
266 info.uses_int16 = true;
267 break;
268 default:
269 break;
270 }
271 switch (inst.GetOpcode()) {
272 case IR::Opcode::UndefU64:
273 case IR::Opcode::LoadGlobalU8:
274 case IR::Opcode::LoadGlobalS8:
275 case IR::Opcode::LoadGlobalU16:
276 case IR::Opcode::LoadGlobalS16:
277 case IR::Opcode::LoadGlobal32:
278 case IR::Opcode::LoadGlobal64:
279 case IR::Opcode::LoadGlobal128:
280 case IR::Opcode::WriteGlobalU8:
281 case IR::Opcode::WriteGlobalS8:
282 case IR::Opcode::WriteGlobalU16:
283 case IR::Opcode::WriteGlobalS16:
284 case IR::Opcode::WriteGlobal32:
285 case IR::Opcode::WriteGlobal64:
286 case IR::Opcode::WriteGlobal128:
287 case IR::Opcode::SelectU64:
288 case IR::Opcode::BitCastU64F64:
289 case IR::Opcode::BitCastF64U64:
290 case IR::Opcode::PackUint2x32:
291 case IR::Opcode::UnpackUint2x32:
292 case IR::Opcode::IAdd64:
293 case IR::Opcode::ISub64:
294 case IR::Opcode::INeg64:
295 case IR::Opcode::ShiftLeftLogical64:
296 case IR::Opcode::ShiftRightLogical64:
297 case IR::Opcode::ShiftRightArithmetic64:
298 case IR::Opcode::ConvertS64F16:
299 case IR::Opcode::ConvertS64F32:
300 case IR::Opcode::ConvertS64F64:
301 case IR::Opcode::ConvertU64F16:
302 case IR::Opcode::ConvertU64F32:
303 case IR::Opcode::ConvertU64F64:
304 case IR::Opcode::ConvertU64U32:
305 case IR::Opcode::ConvertU32U64:
306 case IR::Opcode::ConvertF16U64:
307 case IR::Opcode::ConvertF32U64:
308 case IR::Opcode::ConvertF64U64:
309 case IR::Opcode::SharedAtomicExchange64:
310 case IR::Opcode::GlobalAtomicIAdd64:
311 case IR::Opcode::GlobalAtomicSMin64:
312 case IR::Opcode::GlobalAtomicUMin64:
313 case IR::Opcode::GlobalAtomicSMax64:
314 case IR::Opcode::GlobalAtomicUMax64:
315 case IR::Opcode::GlobalAtomicAnd64:
316 case IR::Opcode::GlobalAtomicOr64:
317 case IR::Opcode::GlobalAtomicXor64:
318 case IR::Opcode::GlobalAtomicExchange64:
319 case IR::Opcode::StorageAtomicIAdd64:
320 case IR::Opcode::StorageAtomicSMin64:
321 case IR::Opcode::StorageAtomicUMin64:
322 case IR::Opcode::StorageAtomicSMax64:
323 case IR::Opcode::StorageAtomicUMax64:
324 case IR::Opcode::StorageAtomicAnd64:
325 case IR::Opcode::StorageAtomicOr64:
326 case IR::Opcode::StorageAtomicXor64:
327 case IR::Opcode::StorageAtomicExchange64:
328 info.uses_int64 = true;
329 break;
330 default:
331 break;
332 }
333 switch (inst.GetOpcode()) {
334 case IR::Opcode::WriteGlobalU8:
335 case IR::Opcode::WriteGlobalS8:
336 case IR::Opcode::WriteGlobalU16:
337 case IR::Opcode::WriteGlobalS16:
338 case IR::Opcode::WriteGlobal32:
339 case IR::Opcode::WriteGlobal64:
340 case IR::Opcode::WriteGlobal128:
341 case IR::Opcode::GlobalAtomicIAdd32:
342 case IR::Opcode::GlobalAtomicSMin32:
343 case IR::Opcode::GlobalAtomicUMin32:
344 case IR::Opcode::GlobalAtomicSMax32:
345 case IR::Opcode::GlobalAtomicUMax32:
346 case IR::Opcode::GlobalAtomicInc32:
347 case IR::Opcode::GlobalAtomicDec32:
348 case IR::Opcode::GlobalAtomicAnd32:
349 case IR::Opcode::GlobalAtomicOr32:
350 case IR::Opcode::GlobalAtomicXor32:
351 case IR::Opcode::GlobalAtomicExchange32:
352 case IR::Opcode::GlobalAtomicIAdd64:
353 case IR::Opcode::GlobalAtomicSMin64:
354 case IR::Opcode::GlobalAtomicUMin64:
355 case IR::Opcode::GlobalAtomicSMax64:
356 case IR::Opcode::GlobalAtomicUMax64:
357 case IR::Opcode::GlobalAtomicAnd64:
358 case IR::Opcode::GlobalAtomicOr64:
359 case IR::Opcode::GlobalAtomicXor64:
360 case IR::Opcode::GlobalAtomicExchange64:
361 case IR::Opcode::GlobalAtomicAddF32:
362 case IR::Opcode::GlobalAtomicAddF16x2:
363 case IR::Opcode::GlobalAtomicAddF32x2:
364 case IR::Opcode::GlobalAtomicMinF16x2:
365 case IR::Opcode::GlobalAtomicMinF32x2:
366 case IR::Opcode::GlobalAtomicMaxF16x2:
367 case IR::Opcode::GlobalAtomicMaxF32x2:
368 info.stores_global_memory = true;
369 [[fallthrough]];
370 case IR::Opcode::LoadGlobalU8:
371 case IR::Opcode::LoadGlobalS8:
372 case IR::Opcode::LoadGlobalU16:
373 case IR::Opcode::LoadGlobalS16:
374 case IR::Opcode::LoadGlobal32:
375 case IR::Opcode::LoadGlobal64:
376 case IR::Opcode::LoadGlobal128:
377 info.uses_int64 = true;
378 info.uses_global_memory = true;
379 info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2;
380 info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4;
381 break;
382 default:
383 break;
384 }
385 switch (inst.GetOpcode()) {
386 case IR::Opcode::DemoteToHelperInvocation:
387 info.uses_demote_to_helper_invocation = true;
388 break;
389 case IR::Opcode::GetAttribute:
390 info.loads.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true;
391 break;
392 case IR::Opcode::SetAttribute:
393 info.stores.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true;
394 break;
395 case IR::Opcode::GetPatch:
396 GetPatch(info, inst.Arg(0).Patch());
397 break;
398 case IR::Opcode::SetPatch:
399 SetPatch(info, inst.Arg(0).Patch());
400 break;
401 case IR::Opcode::GetAttributeIndexed:
402 info.loads_indexed_attributes = true;
403 break;
404 case IR::Opcode::SetAttributeIndexed:
405 info.stores_indexed_attributes = true;
406 break;
407 case IR::Opcode::SetFragColor:
408 info.stores_frag_color[inst.Arg(0).U32()] = true;
409 break;
410 case IR::Opcode::SetSampleMask:
411 info.stores_sample_mask = true;
412 break;
413 case IR::Opcode::SetFragDepth:
414 info.stores_frag_depth = true;
415 break;
416 case IR::Opcode::WorkgroupId:
417 info.uses_workgroup_id = true;
418 break;
419 case IR::Opcode::LocalInvocationId:
420 info.uses_local_invocation_id = true;
421 break;
422 case IR::Opcode::InvocationId:
423 info.uses_invocation_id = true;
424 break;
425 case IR::Opcode::SampleId:
426 info.uses_sample_id = true;
427 break;
428 case IR::Opcode::IsHelperInvocation:
429 info.uses_is_helper_invocation = true;
430 break;
431 case IR::Opcode::LaneId:
432 info.uses_subgroup_invocation_id = true;
433 break;
434 case IR::Opcode::ShuffleIndex:
435 case IR::Opcode::ShuffleUp:
436 case IR::Opcode::ShuffleDown:
437 case IR::Opcode::ShuffleButterfly:
438 info.uses_subgroup_shuffles = true;
439 break;
440 case IR::Opcode::GetCbufU8:
441 case IR::Opcode::GetCbufS8:
442 case IR::Opcode::GetCbufU16:
443 case IR::Opcode::GetCbufS16:
444 case IR::Opcode::GetCbufU32:
445 case IR::Opcode::GetCbufF32:
446 case IR::Opcode::GetCbufU32x2: {
447 const IR::Value index{inst.Arg(0)};
448 const IR::Value offset{inst.Arg(1)};
449 if (!index.IsImmediate()) {
450 throw NotImplementedException("Constant buffer with non-immediate index");
451 }
452 AddConstantBufferDescriptor(info, index.U32(), 1);
453 u32 element_size{};
454 switch (inst.GetOpcode()) {
455 case IR::Opcode::GetCbufU8:
456 case IR::Opcode::GetCbufS8:
457 info.used_constant_buffer_types |= IR::Type::U8;
458 element_size = 1;
459 break;
460 case IR::Opcode::GetCbufU16:
461 case IR::Opcode::GetCbufS16:
462 info.used_constant_buffer_types |= IR::Type::U16;
463 element_size = 2;
464 break;
465 case IR::Opcode::GetCbufU32:
466 info.used_constant_buffer_types |= IR::Type::U32;
467 element_size = 4;
468 break;
469 case IR::Opcode::GetCbufF32:
470 info.used_constant_buffer_types |= IR::Type::F32;
471 element_size = 4;
472 break;
473 case IR::Opcode::GetCbufU32x2:
474 info.used_constant_buffer_types |= IR::Type::U32x2;
475 element_size = 8;
476 break;
477 default:
478 break;
479 }
480 u32& size{info.constant_buffer_used_sizes[index.U32()]};
481 if (offset.IsImmediate()) {
482 size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u);
483 } else {
484 size = 0x10'000;
485 }
486 break;
487 }
488 case IR::Opcode::BindlessImageSampleImplicitLod:
489 case IR::Opcode::BindlessImageSampleExplicitLod:
490 case IR::Opcode::BindlessImageSampleDrefImplicitLod:
491 case IR::Opcode::BindlessImageSampleDrefExplicitLod:
492 case IR::Opcode::BindlessImageGather:
493 case IR::Opcode::BindlessImageGatherDref:
494 case IR::Opcode::BindlessImageFetch:
495 case IR::Opcode::BindlessImageQueryDimensions:
496 case IR::Opcode::BindlessImageQueryLod:
497 case IR::Opcode::BindlessImageGradient:
498 case IR::Opcode::BoundImageSampleImplicitLod:
499 case IR::Opcode::BoundImageSampleExplicitLod:
500 case IR::Opcode::BoundImageSampleDrefImplicitLod:
501 case IR::Opcode::BoundImageSampleDrefExplicitLod:
502 case IR::Opcode::BoundImageGather:
503 case IR::Opcode::BoundImageGatherDref:
504 case IR::Opcode::BoundImageFetch:
505 case IR::Opcode::BoundImageQueryDimensions:
506 case IR::Opcode::BoundImageQueryLod:
507 case IR::Opcode::BoundImageGradient:
508 case IR::Opcode::ImageGather:
509 case IR::Opcode::ImageGatherDref:
510 case IR::Opcode::ImageFetch:
511 case IR::Opcode::ImageQueryDimensions:
512 case IR::Opcode::ImageGradient: {
513 const TextureType type{inst.Flags<IR::TextureInstInfo>().type};
514 info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D;
515 info.uses_sparse_residency |=
516 inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
517 break;
518 }
519 case IR::Opcode::ImageSampleImplicitLod:
520 case IR::Opcode::ImageSampleExplicitLod:
521 case IR::Opcode::ImageSampleDrefImplicitLod:
522 case IR::Opcode::ImageSampleDrefExplicitLod:
523 case IR::Opcode::ImageQueryLod: {
524 const auto flags{inst.Flags<IR::TextureInstInfo>()};
525 const TextureType type{flags.type};
526 info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D;
527 info.uses_shadow_lod |= flags.is_depth != 0;
528 info.uses_sparse_residency |=
529 inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
530 break;
531 }
532 case IR::Opcode::ImageRead: {
533 const auto flags{inst.Flags<IR::TextureInstInfo>()};
534 info.uses_typeless_image_reads |= flags.image_format == ImageFormat::Typeless;
535 info.uses_sparse_residency |=
536 inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
537 break;
538 }
539 case IR::Opcode::ImageWrite: {
540 const auto flags{inst.Flags<IR::TextureInstInfo>()};
541 info.uses_typeless_image_writes |= flags.image_format == ImageFormat::Typeless;
542 info.uses_image_buffers |= flags.type == TextureType::Buffer;
543 break;
544 }
545 case IR::Opcode::SubgroupEqMask:
546 case IR::Opcode::SubgroupLtMask:
547 case IR::Opcode::SubgroupLeMask:
548 case IR::Opcode::SubgroupGtMask:
549 case IR::Opcode::SubgroupGeMask:
550 info.uses_subgroup_mask = true;
551 break;
552 case IR::Opcode::VoteAll:
553 case IR::Opcode::VoteAny:
554 case IR::Opcode::VoteEqual:
555 case IR::Opcode::SubgroupBallot:
556 info.uses_subgroup_vote = true;
557 break;
558 case IR::Opcode::FSwizzleAdd:
559 info.uses_fswzadd = true;
560 break;
561 case IR::Opcode::DPdxFine:
562 case IR::Opcode::DPdyFine:
563 case IR::Opcode::DPdxCoarse:
564 case IR::Opcode::DPdyCoarse:
565 info.uses_derivatives = true;
566 break;
567 case IR::Opcode::LoadStorageU8:
568 case IR::Opcode::LoadStorageS8:
569 case IR::Opcode::WriteStorageU8:
570 case IR::Opcode::WriteStorageS8:
571 info.used_storage_buffer_types |= IR::Type::U8;
572 break;
573 case IR::Opcode::LoadStorageU16:
574 case IR::Opcode::LoadStorageS16:
575 case IR::Opcode::WriteStorageU16:
576 case IR::Opcode::WriteStorageS16:
577 info.used_storage_buffer_types |= IR::Type::U16;
578 break;
579 case IR::Opcode::LoadStorage32:
580 case IR::Opcode::WriteStorage32:
581 case IR::Opcode::StorageAtomicIAdd32:
582 case IR::Opcode::StorageAtomicUMin32:
583 case IR::Opcode::StorageAtomicUMax32:
584 case IR::Opcode::StorageAtomicAnd32:
585 case IR::Opcode::StorageAtomicOr32:
586 case IR::Opcode::StorageAtomicXor32:
587 case IR::Opcode::StorageAtomicExchange32:
588 info.used_storage_buffer_types |= IR::Type::U32;
589 break;
590 case IR::Opcode::LoadStorage64:
591 case IR::Opcode::WriteStorage64:
592 info.used_storage_buffer_types |= IR::Type::U32x2;
593 break;
594 case IR::Opcode::LoadStorage128:
595 case IR::Opcode::WriteStorage128:
596 info.used_storage_buffer_types |= IR::Type::U32x4;
597 break;
598 case IR::Opcode::SharedAtomicSMin32:
599 info.uses_atomic_s32_min = true;
600 break;
601 case IR::Opcode::SharedAtomicSMax32:
602 info.uses_atomic_s32_max = true;
603 break;
604 case IR::Opcode::SharedAtomicInc32:
605 info.uses_shared_increment = true;
606 break;
607 case IR::Opcode::SharedAtomicDec32:
608 info.uses_shared_decrement = true;
609 break;
610 case IR::Opcode::SharedAtomicExchange64:
611 info.uses_int64_bit_atomics = true;
612 break;
613 case IR::Opcode::GlobalAtomicInc32:
614 case IR::Opcode::StorageAtomicInc32:
615 info.used_storage_buffer_types |= IR::Type::U32;
616 info.uses_global_increment = true;
617 break;
618 case IR::Opcode::GlobalAtomicDec32:
619 case IR::Opcode::StorageAtomicDec32:
620 info.used_storage_buffer_types |= IR::Type::U32;
621 info.uses_global_decrement = true;
622 break;
623 case IR::Opcode::GlobalAtomicAddF32:
624 case IR::Opcode::StorageAtomicAddF32:
625 info.used_storage_buffer_types |= IR::Type::U32;
626 info.uses_atomic_f32_add = true;
627 break;
628 case IR::Opcode::GlobalAtomicAddF16x2:
629 case IR::Opcode::StorageAtomicAddF16x2:
630 info.used_storage_buffer_types |= IR::Type::U32;
631 info.uses_atomic_f16x2_add = true;
632 break;
633 case IR::Opcode::GlobalAtomicAddF32x2:
634 case IR::Opcode::StorageAtomicAddF32x2:
635 info.used_storage_buffer_types |= IR::Type::U32;
636 info.uses_atomic_f32x2_add = true;
637 break;
638 case IR::Opcode::GlobalAtomicMinF16x2:
639 case IR::Opcode::StorageAtomicMinF16x2:
640 info.used_storage_buffer_types |= IR::Type::U32;
641 info.uses_atomic_f16x2_min = true;
642 break;
643 case IR::Opcode::GlobalAtomicMinF32x2:
644 case IR::Opcode::StorageAtomicMinF32x2:
645 info.used_storage_buffer_types |= IR::Type::U32;
646 info.uses_atomic_f32x2_min = true;
647 break;
648 case IR::Opcode::GlobalAtomicMaxF16x2:
649 case IR::Opcode::StorageAtomicMaxF16x2:
650 info.used_storage_buffer_types |= IR::Type::U32;
651 info.uses_atomic_f16x2_max = true;
652 break;
653 case IR::Opcode::GlobalAtomicMaxF32x2:
654 case IR::Opcode::StorageAtomicMaxF32x2:
655 info.used_storage_buffer_types |= IR::Type::U32;
656 info.uses_atomic_f32x2_max = true;
657 break;
658 case IR::Opcode::StorageAtomicSMin32:
659 info.used_storage_buffer_types |= IR::Type::U32;
660 info.uses_atomic_s32_min = true;
661 break;
662 case IR::Opcode::StorageAtomicSMax32:
663 info.used_storage_buffer_types |= IR::Type::U32;
664 info.uses_atomic_s32_max = true;
665 break;
666 case IR::Opcode::GlobalAtomicIAdd64:
667 case IR::Opcode::GlobalAtomicSMin64:
668 case IR::Opcode::GlobalAtomicUMin64:
669 case IR::Opcode::GlobalAtomicSMax64:
670 case IR::Opcode::GlobalAtomicUMax64:
671 case IR::Opcode::GlobalAtomicAnd64:
672 case IR::Opcode::GlobalAtomicOr64:
673 case IR::Opcode::GlobalAtomicXor64:
674 case IR::Opcode::GlobalAtomicExchange64:
675 case IR::Opcode::StorageAtomicIAdd64:
676 case IR::Opcode::StorageAtomicSMin64:
677 case IR::Opcode::StorageAtomicUMin64:
678 case IR::Opcode::StorageAtomicSMax64:
679 case IR::Opcode::StorageAtomicUMax64:
680 case IR::Opcode::StorageAtomicAnd64:
681 case IR::Opcode::StorageAtomicOr64:
682 case IR::Opcode::StorageAtomicXor64:
683 info.used_storage_buffer_types |= IR::Type::U64;
684 info.uses_int64_bit_atomics = true;
685 break;
686 case IR::Opcode::BindlessImageAtomicIAdd32:
687 case IR::Opcode::BindlessImageAtomicSMin32:
688 case IR::Opcode::BindlessImageAtomicUMin32:
689 case IR::Opcode::BindlessImageAtomicSMax32:
690 case IR::Opcode::BindlessImageAtomicUMax32:
691 case IR::Opcode::BindlessImageAtomicInc32:
692 case IR::Opcode::BindlessImageAtomicDec32:
693 case IR::Opcode::BindlessImageAtomicAnd32:
694 case IR::Opcode::BindlessImageAtomicOr32:
695 case IR::Opcode::BindlessImageAtomicXor32:
696 case IR::Opcode::BindlessImageAtomicExchange32:
697 case IR::Opcode::BoundImageAtomicIAdd32:
698 case IR::Opcode::BoundImageAtomicSMin32:
699 case IR::Opcode::BoundImageAtomicUMin32:
700 case IR::Opcode::BoundImageAtomicSMax32:
701 case IR::Opcode::BoundImageAtomicUMax32:
702 case IR::Opcode::BoundImageAtomicInc32:
703 case IR::Opcode::BoundImageAtomicDec32:
704 case IR::Opcode::BoundImageAtomicAnd32:
705 case IR::Opcode::BoundImageAtomicOr32:
706 case IR::Opcode::BoundImageAtomicXor32:
707 case IR::Opcode::BoundImageAtomicExchange32:
708 case IR::Opcode::ImageAtomicIAdd32:
709 case IR::Opcode::ImageAtomicSMin32:
710 case IR::Opcode::ImageAtomicUMin32:
711 case IR::Opcode::ImageAtomicSMax32:
712 case IR::Opcode::ImageAtomicUMax32:
713 case IR::Opcode::ImageAtomicInc32:
714 case IR::Opcode::ImageAtomicDec32:
715 case IR::Opcode::ImageAtomicAnd32:
716 case IR::Opcode::ImageAtomicOr32:
717 case IR::Opcode::ImageAtomicXor32:
718 case IR::Opcode::ImageAtomicExchange32:
719 info.uses_atomic_image_u32 = true;
720 break;
721 default:
722 break;
723 }
724}
725
726void VisitFpModifiers(Info& info, IR::Inst& inst) {
727 switch (inst.GetOpcode()) {
728 case IR::Opcode::FPAdd16:
729 case IR::Opcode::FPFma16:
730 case IR::Opcode::FPMul16:
731 case IR::Opcode::FPRoundEven16:
732 case IR::Opcode::FPFloor16:
733 case IR::Opcode::FPCeil16:
734 case IR::Opcode::FPTrunc16: {
735 const auto control{inst.Flags<IR::FpControl>()};
736 switch (control.fmz_mode) {
737 case IR::FmzMode::DontCare:
738 break;
739 case IR::FmzMode::FTZ:
740 case IR::FmzMode::FMZ:
741 info.uses_fp16_denorms_flush = true;
742 break;
743 case IR::FmzMode::None:
744 info.uses_fp16_denorms_preserve = true;
745 break;
746 }
747 break;
748 }
749 case IR::Opcode::FPAdd32:
750 case IR::Opcode::FPFma32:
751 case IR::Opcode::FPMul32:
752 case IR::Opcode::FPRoundEven32:
753 case IR::Opcode::FPFloor32:
754 case IR::Opcode::FPCeil32:
755 case IR::Opcode::FPTrunc32:
756 case IR::Opcode::FPOrdEqual32:
757 case IR::Opcode::FPUnordEqual32:
758 case IR::Opcode::FPOrdNotEqual32:
759 case IR::Opcode::FPUnordNotEqual32:
760 case IR::Opcode::FPOrdLessThan32:
761 case IR::Opcode::FPUnordLessThan32:
762 case IR::Opcode::FPOrdGreaterThan32:
763 case IR::Opcode::FPUnordGreaterThan32:
764 case IR::Opcode::FPOrdLessThanEqual32:
765 case IR::Opcode::FPUnordLessThanEqual32:
766 case IR::Opcode::FPOrdGreaterThanEqual32:
767 case IR::Opcode::FPUnordGreaterThanEqual32:
768 case IR::Opcode::ConvertF16F32:
769 case IR::Opcode::ConvertF64F32: {
770 const auto control{inst.Flags<IR::FpControl>()};
771 switch (control.fmz_mode) {
772 case IR::FmzMode::DontCare:
773 break;
774 case IR::FmzMode::FTZ:
775 case IR::FmzMode::FMZ:
776 info.uses_fp32_denorms_flush = true;
777 break;
778 case IR::FmzMode::None:
779 info.uses_fp32_denorms_preserve = true;
780 break;
781 }
782 break;
783 }
784 default:
785 break;
786 }
787}
788
789void VisitCbufs(Info& info, IR::Inst& inst) {
790 switch (inst.GetOpcode()) {
791 case IR::Opcode::GetCbufU8:
792 case IR::Opcode::GetCbufS8:
793 case IR::Opcode::GetCbufU16:
794 case IR::Opcode::GetCbufS16:
795 case IR::Opcode::GetCbufU32:
796 case IR::Opcode::GetCbufF32:
797 case IR::Opcode::GetCbufU32x2: {
798 CheckCBufNVN(info, inst);
799 break;
800 }
801 default:
802 break;
803 }
804}
805
806void Visit(Info& info, IR::Inst& inst) {
807 VisitUsages(info, inst);
808 VisitFpModifiers(info, inst);
809 VisitCbufs(info, inst);
810}
811
812void GatherInfoFromHeader(Environment& env, Info& info) {
813 Stage stage{env.ShaderStage()};
814 if (stage == Stage::Compute) {
815 return;
816 }
817 const auto& header{env.SPH()};
818 if (stage == Stage::Fragment) {
819 if (!info.loads_indexed_attributes) {
820 return;
821 }
822 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
823 const size_t offset{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
824 const auto vector{header.ps.imap_generic_vector[index]};
825 info.loads.mask[offset + 0] = vector.x != PixelImap::Unused;
826 info.loads.mask[offset + 1] = vector.y != PixelImap::Unused;
827 info.loads.mask[offset + 2] = vector.z != PixelImap::Unused;
828 info.loads.mask[offset + 3] = vector.w != PixelImap::Unused;
829 }
830 return;
831 }
832 if (info.loads_indexed_attributes) {
833 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
834 const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4};
835 const auto mask = header.vtg.InputGeneric(index);
836 for (size_t i = 0; i < 4; ++i) {
837 info.loads.Set(attribute + i, mask[i]);
838 }
839 }
840 for (size_t index = 0; index < 8; ++index) {
841 const u16 mask{header.vtg.clip_distances};
842 info.loads.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0);
843 }
844 info.loads.Set(IR::Attribute::PrimitiveId, header.vtg.imap_systemb.primitive_array_id != 0);
845 info.loads.Set(IR::Attribute::Layer, header.vtg.imap_systemb.rt_array_index != 0);
846 info.loads.Set(IR::Attribute::ViewportIndex, header.vtg.imap_systemb.viewport_index != 0);
847 info.loads.Set(IR::Attribute::PointSize, header.vtg.imap_systemb.point_size != 0);
848 info.loads.Set(IR::Attribute::PositionX, header.vtg.imap_systemb.position_x != 0);
849 info.loads.Set(IR::Attribute::PositionY, header.vtg.imap_systemb.position_y != 0);
850 info.loads.Set(IR::Attribute::PositionZ, header.vtg.imap_systemb.position_z != 0);
851 info.loads.Set(IR::Attribute::PositionW, header.vtg.imap_systemb.position_w != 0);
852 info.loads.Set(IR::Attribute::PointSpriteS, header.vtg.point_sprite_s != 0);
853 info.loads.Set(IR::Attribute::PointSpriteT, header.vtg.point_sprite_t != 0);
854 info.loads.Set(IR::Attribute::FogCoordinate, header.vtg.fog_coordinate != 0);
855 info.loads.Set(IR::Attribute::TessellationEvaluationPointU,
856 header.vtg.tessellation_eval_point_u != 0);
857 info.loads.Set(IR::Attribute::TessellationEvaluationPointV,
858 header.vtg.tessellation_eval_point_v != 0);
859 info.loads.Set(IR::Attribute::InstanceId, header.vtg.instance_id != 0);
860 info.loads.Set(IR::Attribute::VertexId, header.vtg.vertex_id != 0);
861 // TODO: Legacy varyings
862 }
863 if (info.stores_indexed_attributes) {
864 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
865 const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4};
866 const auto mask{header.vtg.OutputGeneric(index)};
867 for (size_t i = 0; i < 4; ++i) {
868 info.stores.Set(attribute + i, mask[i]);
869 }
870 }
871 for (size_t index = 0; index < 8; ++index) {
872 const u16 mask{header.vtg.omap_systemc.clip_distances};
873 info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0);
874 }
875 info.stores.Set(IR::Attribute::PrimitiveId,
876 header.vtg.omap_systemb.primitive_array_id != 0);
877 info.stores.Set(IR::Attribute::Layer, header.vtg.omap_systemb.rt_array_index != 0);
878 info.stores.Set(IR::Attribute::ViewportIndex, header.vtg.omap_systemb.viewport_index != 0);
879 info.stores.Set(IR::Attribute::PointSize, header.vtg.omap_systemb.point_size != 0);
880 info.stores.Set(IR::Attribute::PositionX, header.vtg.omap_systemb.position_x != 0);
881 info.stores.Set(IR::Attribute::PositionY, header.vtg.omap_systemb.position_y != 0);
882 info.stores.Set(IR::Attribute::PositionZ, header.vtg.omap_systemb.position_z != 0);
883 info.stores.Set(IR::Attribute::PositionW, header.vtg.omap_systemb.position_w != 0);
884 info.stores.Set(IR::Attribute::PointSpriteS, header.vtg.omap_systemc.point_sprite_s != 0);
885 info.stores.Set(IR::Attribute::PointSpriteT, header.vtg.omap_systemc.point_sprite_t != 0);
886 info.stores.Set(IR::Attribute::FogCoordinate, header.vtg.omap_systemc.fog_coordinate != 0);
887 info.stores.Set(IR::Attribute::TessellationEvaluationPointU,
888 header.vtg.omap_systemc.tessellation_eval_point_u != 0);
889 info.stores.Set(IR::Attribute::TessellationEvaluationPointV,
890 header.vtg.omap_systemc.tessellation_eval_point_v != 0);
891 info.stores.Set(IR::Attribute::InstanceId, header.vtg.omap_systemc.instance_id != 0);
892 info.stores.Set(IR::Attribute::VertexId, header.vtg.omap_systemc.vertex_id != 0);
893 // TODO: Legacy varyings
894 }
895}
896} // Anonymous namespace
897
898void CollectShaderInfoPass(Environment& env, IR::Program& program) {
899 Info& info{program.info};
900 const u32 base{[&] {
901 switch (program.stage) {
902 case Stage::VertexA:
903 case Stage::VertexB:
904 return 0x110u;
905 case Stage::TessellationControl:
906 return 0x210u;
907 case Stage::TessellationEval:
908 return 0x310u;
909 case Stage::Geometry:
910 return 0x410u;
911 case Stage::Fragment:
912 return 0x510u;
913 case Stage::Compute:
914 return 0x310u;
915 }
916 throw InvalidArgument("Invalid stage {}", program.stage);
917 }()};
918 info.nvn_buffer_base = base;
919
920 for (IR::Block* const block : program.post_order_blocks) {
921 for (IR::Inst& inst : block->Instructions()) {
922 Visit(info, inst);
923 }
924 }
925 GatherInfoFromHeader(env, info);
926}
927
928} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
new file mode 100644
index 000000000..8dd6d6c2c
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -0,0 +1,610 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <tuple>
7#include <type_traits>
8
9#include "common/bit_cast.h"
10#include "common/bit_util.h"
11#include "shader_recompiler/exception.h"
12#include "shader_recompiler/frontend/ir/ir_emitter.h"
13#include "shader_recompiler/frontend/ir/value.h"
14#include "shader_recompiler/ir_opt/passes.h"
15
16namespace Shader::Optimization {
17namespace {
18// Metaprogramming stuff to get arguments information out of a lambda
19template <typename Func>
20struct LambdaTraits : LambdaTraits<decltype(&std::remove_reference_t<Func>::operator())> {};
21
22template <typename ReturnType, typename LambdaType, typename... Args>
23struct LambdaTraits<ReturnType (LambdaType::*)(Args...) const> {
24 template <size_t I>
25 using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
26
27 static constexpr size_t NUM_ARGS{sizeof...(Args)};
28};
29
30template <typename T>
31[[nodiscard]] T Arg(const IR::Value& value) {
32 if constexpr (std::is_same_v<T, bool>) {
33 return value.U1();
34 } else if constexpr (std::is_same_v<T, u32>) {
35 return value.U32();
36 } else if constexpr (std::is_same_v<T, s32>) {
37 return static_cast<s32>(value.U32());
38 } else if constexpr (std::is_same_v<T, f32>) {
39 return value.F32();
40 } else if constexpr (std::is_same_v<T, u64>) {
41 return value.U64();
42 }
43}
44
45template <typename T, typename ImmFn>
46bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
47 const IR::Value lhs{inst.Arg(0)};
48 const IR::Value rhs{inst.Arg(1)};
49
50 const bool is_lhs_immediate{lhs.IsImmediate()};
51 const bool is_rhs_immediate{rhs.IsImmediate()};
52
53 if (is_lhs_immediate && is_rhs_immediate) {
54 const auto result{imm_fn(Arg<T>(lhs), Arg<T>(rhs))};
55 inst.ReplaceUsesWith(IR::Value{result});
56 return false;
57 }
58 if (is_lhs_immediate && !is_rhs_immediate) {
59 IR::Inst* const rhs_inst{rhs.InstRecursive()};
60 if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) {
61 const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))};
62 inst.SetArg(0, rhs_inst->Arg(0));
63 inst.SetArg(1, IR::Value{combined});
64 } else {
65 // Normalize
66 inst.SetArg(0, rhs);
67 inst.SetArg(1, lhs);
68 }
69 }
70 if (!is_lhs_immediate && is_rhs_immediate) {
71 const IR::Inst* const lhs_inst{lhs.InstRecursive()};
72 if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) {
73 const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))};
74 inst.SetArg(0, lhs_inst->Arg(0));
75 inst.SetArg(1, IR::Value{combined});
76 }
77 }
78 return true;
79}
80
81template <typename Func>
82bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) {
83 if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) {
84 return false;
85 }
86 using Indices = std::make_index_sequence<LambdaTraits<decltype(func)>::NUM_ARGS>;
87 inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{}));
88 return true;
89}
90
91void FoldGetRegister(IR::Inst& inst) {
92 if (inst.Arg(0).Reg() == IR::Reg::RZ) {
93 inst.ReplaceUsesWith(IR::Value{u32{0}});
94 }
95}
96
97void FoldGetPred(IR::Inst& inst) {
98 if (inst.Arg(0).Pred() == IR::Pred::PT) {
99 inst.ReplaceUsesWith(IR::Value{true});
100 }
101}
102
103/// Replaces the pattern generated by two XMAD multiplications
104bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
105 /*
106 * We are looking for this pattern:
107 * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16
108 * %rhs_mul = IMul32 %rhs_bfe, %factor_b
109 * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16
110 * %rhs_mul = IMul32 %lhs_bfe, %factor_b
111 * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16
112 * %result = IAdd32 %lhs_shl, %rhs_mul
113 *
114 * And replacing it with
115 * %result = IMul32 %factor_a, %factor_b
116 *
117 * This optimization has been proven safe by LLVM and MSVC.
118 */
119 const IR::Value lhs_arg{inst.Arg(0)};
120 const IR::Value rhs_arg{inst.Arg(1)};
121 if (lhs_arg.IsImmediate() || rhs_arg.IsImmediate()) {
122 return false;
123 }
124 IR::Inst* const lhs_shl{lhs_arg.InstRecursive()};
125 if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
126 lhs_shl->Arg(1) != IR::Value{16U}) {
127 return false;
128 }
129 if (lhs_shl->Arg(0).IsImmediate()) {
130 return false;
131 }
132 IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()};
133 IR::Inst* const rhs_mul{rhs_arg.InstRecursive()};
134 if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) {
135 return false;
136 }
137 if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) {
138 return false;
139 }
140 const IR::U32 factor_b{lhs_mul->Arg(1)};
141 if (lhs_mul->Arg(0).IsImmediate() || rhs_mul->Arg(0).IsImmediate()) {
142 return false;
143 }
144 IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()};
145 IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()};
146 if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
147 return false;
148 }
149 if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
150 return false;
151 }
152 if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) {
153 return false;
154 }
155 if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) {
156 return false;
157 }
158 if (lhs_bfe->Arg(0).Resolve() != rhs_bfe->Arg(0).Resolve()) {
159 return false;
160 }
161 const IR::U32 factor_a{lhs_bfe->Arg(0)};
162 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
163 inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b));
164 return true;
165}
166
167template <typename T>
168void FoldAdd(IR::Block& block, IR::Inst& inst) {
169 if (inst.HasAssociatedPseudoOperation()) {
170 return;
171 }
172 if (!FoldCommutative<T>(inst, [](T a, T b) { return a + b; })) {
173 return;
174 }
175 const IR::Value rhs{inst.Arg(1)};
176 if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
177 inst.ReplaceUsesWith(inst.Arg(0));
178 return;
179 }
180 if constexpr (std::is_same_v<T, u32>) {
181 if (FoldXmadMultiply(block, inst)) {
182 return;
183 }
184 }
185}
186
187void FoldISub32(IR::Inst& inst) {
188 if (FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; })) {
189 return;
190 }
191 if (inst.Arg(0).IsImmediate() || inst.Arg(1).IsImmediate()) {
192 return;
193 }
194 // ISub32 is generally used to subtract two constant buffers, compare and replace this with
195 // zero if they equal.
196 const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) {
197 return a->GetOpcode() == IR::Opcode::GetCbufU32 &&
198 b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) &&
199 a->Arg(1) == b->Arg(1);
200 }};
201 IR::Inst* op_a{inst.Arg(0).InstRecursive()};
202 IR::Inst* op_b{inst.Arg(1).InstRecursive()};
203 if (equal_cbuf(op_a, op_b)) {
204 inst.ReplaceUsesWith(IR::Value{u32{0}});
205 return;
206 }
207 // It's also possible a value is being added to a cbuf and then subtracted
208 if (op_b->GetOpcode() == IR::Opcode::IAdd32) {
209 // Canonicalize local variables to simplify the following logic
210 std::swap(op_a, op_b);
211 }
212 if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) {
213 return;
214 }
215 IR::Inst* const inst_cbuf{op_b};
216 if (op_a->GetOpcode() != IR::Opcode::IAdd32) {
217 return;
218 }
219 IR::Value add_op_a{op_a->Arg(0)};
220 IR::Value add_op_b{op_a->Arg(1)};
221 if (add_op_b.IsImmediate()) {
222 // Canonicalize
223 std::swap(add_op_a, add_op_b);
224 }
225 if (add_op_b.IsImmediate()) {
226 return;
227 }
228 IR::Inst* const add_cbuf{add_op_b.InstRecursive()};
229 if (equal_cbuf(add_cbuf, inst_cbuf)) {
230 inst.ReplaceUsesWith(add_op_a);
231 }
232}
233
234void FoldSelect(IR::Inst& inst) {
235 const IR::Value cond{inst.Arg(0)};
236 if (cond.IsImmediate()) {
237 inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2));
238 }
239}
240
241void FoldFPMul32(IR::Inst& inst) {
242 const auto control{inst.Flags<IR::FpControl>()};
243 if (control.no_contraction) {
244 return;
245 }
246 // Fold interpolation operations
247 const IR::Value lhs_value{inst.Arg(0)};
248 const IR::Value rhs_value{inst.Arg(1)};
249 if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
250 return;
251 }
252 IR::Inst* const lhs_op{lhs_value.InstRecursive()};
253 IR::Inst* const rhs_op{rhs_value.InstRecursive()};
254 if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 ||
255 rhs_op->GetOpcode() != IR::Opcode::FPRecip32) {
256 return;
257 }
258 const IR::Value recip_source{rhs_op->Arg(0)};
259 const IR::Value lhs_mul_source{lhs_op->Arg(1).Resolve()};
260 if (recip_source.IsImmediate() || lhs_mul_source.IsImmediate()) {
261 return;
262 }
263 IR::Inst* const attr_a{recip_source.InstRecursive()};
264 IR::Inst* const attr_b{lhs_mul_source.InstRecursive()};
265 if (attr_a->GetOpcode() != IR::Opcode::GetAttribute ||
266 attr_b->GetOpcode() != IR::Opcode::GetAttribute) {
267 return;
268 }
269 if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) {
270 inst.ReplaceUsesWith(lhs_op->Arg(0));
271 }
272}
273
274void FoldLogicalAnd(IR::Inst& inst) {
275 if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a && b; })) {
276 return;
277 }
278 const IR::Value rhs{inst.Arg(1)};
279 if (rhs.IsImmediate()) {
280 if (rhs.U1()) {
281 inst.ReplaceUsesWith(inst.Arg(0));
282 } else {
283 inst.ReplaceUsesWith(IR::Value{false});
284 }
285 }
286}
287
288void FoldLogicalOr(IR::Inst& inst) {
289 if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a || b; })) {
290 return;
291 }
292 const IR::Value rhs{inst.Arg(1)};
293 if (rhs.IsImmediate()) {
294 if (rhs.U1()) {
295 inst.ReplaceUsesWith(IR::Value{true});
296 } else {
297 inst.ReplaceUsesWith(inst.Arg(0));
298 }
299 }
300}
301
302void FoldLogicalNot(IR::Inst& inst) {
303 const IR::U1 value{inst.Arg(0)};
304 if (value.IsImmediate()) {
305 inst.ReplaceUsesWith(IR::Value{!value.U1()});
306 return;
307 }
308 IR::Inst* const arg{value.InstRecursive()};
309 if (arg->GetOpcode() == IR::Opcode::LogicalNot) {
310 inst.ReplaceUsesWith(arg->Arg(0));
311 }
312}
313
314template <IR::Opcode op, typename Dest, typename Source>
315void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
316 const IR::Value value{inst.Arg(0)};
317 if (value.IsImmediate()) {
318 inst.ReplaceUsesWith(IR::Value{Common::BitCast<Dest>(Arg<Source>(value))});
319 return;
320 }
321 IR::Inst* const arg_inst{value.InstRecursive()};
322 if (arg_inst->GetOpcode() == reverse) {
323 inst.ReplaceUsesWith(arg_inst->Arg(0));
324 return;
325 }
326 if constexpr (op == IR::Opcode::BitCastF32U32) {
327 if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) {
328 // Replace the bitcast with a typed constant buffer read
329 inst.ReplaceOpcode(IR::Opcode::GetCbufF32);
330 inst.SetArg(0, arg_inst->Arg(0));
331 inst.SetArg(1, arg_inst->Arg(1));
332 return;
333 }
334 }
335}
336
337void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
338 const IR::Value value{inst.Arg(0)};
339 if (value.IsImmediate()) {
340 return;
341 }
342 IR::Inst* const arg_inst{value.InstRecursive()};
343 if (arg_inst->GetOpcode() == reverse) {
344 inst.ReplaceUsesWith(arg_inst->Arg(0));
345 return;
346 }
347}
348
349template <typename Func, size_t... I>
350IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) {
351 using Traits = LambdaTraits<decltype(func)>;
352 return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)};
353}
354
355std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
356 IR::Opcode construct, u32 first_index) {
357 IR::Inst* const inst{inst_value.InstRecursive()};
358 if (inst->GetOpcode() == construct) {
359 return inst->Arg(first_index);
360 }
361 if (inst->GetOpcode() != insert) {
362 return std::nullopt;
363 }
364 IR::Value value_index{inst->Arg(2)};
365 if (!value_index.IsImmediate()) {
366 return std::nullopt;
367 }
368 const u32 second_index{value_index.U32()};
369 if (first_index != second_index) {
370 IR::Value value_composite{inst->Arg(0)};
371 if (value_composite.IsImmediate()) {
372 return std::nullopt;
373 }
374 return FoldCompositeExtractImpl(value_composite, insert, construct, first_index);
375 }
376 return inst->Arg(1);
377}
378
379void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode insert) {
380 const IR::Value value_1{inst.Arg(0)};
381 const IR::Value value_2{inst.Arg(1)};
382 if (value_1.IsImmediate()) {
383 return;
384 }
385 if (!value_2.IsImmediate()) {
386 return;
387 }
388 const u32 first_index{value_2.U32()};
389 const std::optional result{FoldCompositeExtractImpl(value_1, insert, construct, first_index)};
390 if (!result) {
391 return;
392 }
393 inst.ReplaceUsesWith(*result);
394}
395
396IR::Value GetThroughCast(IR::Value value, IR::Opcode expected_cast) {
397 if (value.IsImmediate()) {
398 return value;
399 }
400 IR::Inst* const inst{value.InstRecursive()};
401 if (inst->GetOpcode() == expected_cast) {
402 return inst->Arg(0).Resolve();
403 }
404 return value;
405}
406
407void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
408 const IR::Value swizzle{inst.Arg(2)};
409 if (!swizzle.IsImmediate()) {
410 return;
411 }
412 const IR::Value value_1{GetThroughCast(inst.Arg(0).Resolve(), IR::Opcode::BitCastF32U32)};
413 const IR::Value value_2{GetThroughCast(inst.Arg(1).Resolve(), IR::Opcode::BitCastF32U32)};
414 if (value_1.IsImmediate()) {
415 return;
416 }
417 const u32 swizzle_value{swizzle.U32()};
418 if (swizzle_value != 0x99 && swizzle_value != 0xA5) {
419 return;
420 }
421 IR::Inst* const inst2{value_1.InstRecursive()};
422 if (inst2->GetOpcode() != IR::Opcode::ShuffleButterfly) {
423 return;
424 }
425 const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)};
426 if (value_2 != value_3) {
427 return;
428 }
429 const IR::Value index{inst2->Arg(1)};
430 const IR::Value clamp{inst2->Arg(2)};
431 const IR::Value segmentation_mask{inst2->Arg(3)};
432 if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) {
433 return;
434 }
435 if (clamp.U32() != 3 || segmentation_mask.U32() != 28) {
436 return;
437 }
438 if (swizzle_value == 0x99) {
439 // DPdxFine
440 if (index.U32() == 1) {
441 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
442 inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{inst.Arg(1)}));
443 }
444 } else if (swizzle_value == 0xA5) {
445 // DPdyFine
446 if (index.U32() == 2) {
447 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
448 inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{inst.Arg(1)}));
449 }
450 }
451}
452
453void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
454 switch (inst.GetOpcode()) {
455 case IR::Opcode::GetRegister:
456 return FoldGetRegister(inst);
457 case IR::Opcode::GetPred:
458 return FoldGetPred(inst);
459 case IR::Opcode::IAdd32:
460 return FoldAdd<u32>(block, inst);
461 case IR::Opcode::ISub32:
462 return FoldISub32(inst);
463 case IR::Opcode::IMul32:
464 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
465 return;
466 case IR::Opcode::ShiftRightArithmetic32:
467 FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast<u32>(a >> b); });
468 return;
469 case IR::Opcode::BitCastF32U32:
470 return FoldBitCast<IR::Opcode::BitCastF32U32, f32, u32>(inst, IR::Opcode::BitCastU32F32);
471 case IR::Opcode::BitCastU32F32:
472 return FoldBitCast<IR::Opcode::BitCastU32F32, u32, f32>(inst, IR::Opcode::BitCastF32U32);
473 case IR::Opcode::IAdd64:
474 return FoldAdd<u64>(block, inst);
475 case IR::Opcode::PackHalf2x16:
476 return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16);
477 case IR::Opcode::UnpackHalf2x16:
478 return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16);
479 case IR::Opcode::SelectU1:
480 case IR::Opcode::SelectU8:
481 case IR::Opcode::SelectU16:
482 case IR::Opcode::SelectU32:
483 case IR::Opcode::SelectU64:
484 case IR::Opcode::SelectF16:
485 case IR::Opcode::SelectF32:
486 case IR::Opcode::SelectF64:
487 return FoldSelect(inst);
488 case IR::Opcode::FPMul32:
489 return FoldFPMul32(inst);
490 case IR::Opcode::LogicalAnd:
491 return FoldLogicalAnd(inst);
492 case IR::Opcode::LogicalOr:
493 return FoldLogicalOr(inst);
494 case IR::Opcode::LogicalNot:
495 return FoldLogicalNot(inst);
496 case IR::Opcode::SLessThan:
497 FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; });
498 return;
499 case IR::Opcode::ULessThan:
500 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; });
501 return;
502 case IR::Opcode::SLessThanEqual:
503 FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; });
504 return;
505 case IR::Opcode::ULessThanEqual:
506 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; });
507 return;
508 case IR::Opcode::SGreaterThan:
509 FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; });
510 return;
511 case IR::Opcode::UGreaterThan:
512 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; });
513 return;
514 case IR::Opcode::SGreaterThanEqual:
515 FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; });
516 return;
517 case IR::Opcode::UGreaterThanEqual:
518 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; });
519 return;
520 case IR::Opcode::IEqual:
521 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; });
522 return;
523 case IR::Opcode::INotEqual:
524 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; });
525 return;
526 case IR::Opcode::BitwiseAnd32:
527 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; });
528 return;
529 case IR::Opcode::BitwiseOr32:
530 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a | b; });
531 return;
532 case IR::Opcode::BitwiseXor32:
533 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a ^ b; });
534 return;
535 case IR::Opcode::BitFieldUExtract:
536 FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) {
537 if (static_cast<size_t>(shift) + static_cast<size_t>(count) > 32) {
538 throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
539 base, shift, count);
540 }
541 return (base >> shift) & ((1U << count) - 1);
542 });
543 return;
544 case IR::Opcode::BitFieldSExtract:
545 FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) {
546 const size_t back_shift{static_cast<size_t>(shift) + static_cast<size_t>(count)};
547 const size_t left_shift{32 - back_shift};
548 const size_t right_shift{static_cast<size_t>(32 - count)};
549 if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) {
550 throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
551 base, shift, count);
552 }
553 return static_cast<u32>((base << left_shift) >> right_shift);
554 });
555 return;
556 case IR::Opcode::BitFieldInsert:
557 FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) {
558 if (bits >= 32 || offset >= 32) {
559 throw LogicError("Undefined result in {}({}, {}, {}, {})",
560 IR::Opcode::BitFieldInsert, base, insert, offset, bits);
561 }
562 return (base & ~(~(~0u << bits) << offset)) | (insert << offset);
563 });
564 return;
565 case IR::Opcode::CompositeExtractU32x2:
566 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x2,
567 IR::Opcode::CompositeInsertU32x2);
568 case IR::Opcode::CompositeExtractU32x3:
569 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x3,
570 IR::Opcode::CompositeInsertU32x3);
571 case IR::Opcode::CompositeExtractU32x4:
572 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x4,
573 IR::Opcode::CompositeInsertU32x4);
574 case IR::Opcode::CompositeExtractF32x2:
575 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2,
576 IR::Opcode::CompositeInsertF32x2);
577 case IR::Opcode::CompositeExtractF32x3:
578 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x3,
579 IR::Opcode::CompositeInsertF32x3);
580 case IR::Opcode::CompositeExtractF32x4:
581 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x4,
582 IR::Opcode::CompositeInsertF32x4);
583 case IR::Opcode::CompositeExtractF16x2:
584 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x2,
585 IR::Opcode::CompositeInsertF16x2);
586 case IR::Opcode::CompositeExtractF16x3:
587 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x3,
588 IR::Opcode::CompositeInsertF16x3);
589 case IR::Opcode::CompositeExtractF16x4:
590 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4,
591 IR::Opcode::CompositeInsertF16x4);
592 case IR::Opcode::FSwizzleAdd:
593 return FoldFSwizzleAdd(block, inst);
594 default:
595 break;
596 }
597}
598} // Anonymous namespace
599
600void ConstantPropagationPass(IR::Program& program) {
601 const auto end{program.post_order_blocks.rend()};
602 for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
603 IR::Block* const block{*it};
604 for (IR::Inst& inst : block->Instructions()) {
605 ConstantPropagation(*block, inst);
606 }
607 }
608}
609
610} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp
new file mode 100644
index 000000000..400836301
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp
@@ -0,0 +1,26 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/ir/basic_block.h"
6#include "shader_recompiler/frontend/ir/value.h"
7#include "shader_recompiler/ir_opt/passes.h"
8
9namespace Shader::Optimization {
10
11void DeadCodeEliminationPass(IR::Program& program) {
12 // We iterate over the instructions in reverse order.
13 // This is because removing an instruction reduces the number of uses for earlier instructions.
14 for (IR::Block* const block : program.post_order_blocks) {
15 auto it{block->end()};
16 while (it != block->begin()) {
17 --it;
18 if (!it->HasUses() && !it->MayHaveSideEffects()) {
19 it->Invalidate();
20 it = block->Instructions().erase(it);
21 }
22 }
23 }
24}
25
26} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
new file mode 100644
index 000000000..055ba9c54
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
@@ -0,0 +1,30 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/ir/ir_emitter.h"
6#include "shader_recompiler/ir_opt/passes.h"
7
8namespace Shader::Optimization {
9
10void VertexATransformPass(IR::Program& program) {
11 for (IR::Block* const block : program.blocks) {
12 for (IR::Inst& inst : block->Instructions()) {
13 if (inst.GetOpcode() == IR::Opcode::Epilogue) {
14 return inst.Invalidate();
15 }
16 }
17 }
18}
19
20void VertexBTransformPass(IR::Program& program) {
21 for (IR::Block* const block : program.blocks) {
22 for (IR::Inst& inst : block->Instructions()) {
23 if (inst.GetOpcode() == IR::Opcode::Prologue) {
24 return inst.Invalidate();
25 }
26 }
27 }
28}
29
30} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
new file mode 100644
index 000000000..4197b0095
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -0,0 +1,526 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <compare>
7#include <optional>
8#include <queue>
9
10#include <boost/container/flat_set.hpp>
11#include <boost/container/small_vector.hpp>
12
13#include "common/alignment.h"
14#include "shader_recompiler/frontend/ir/basic_block.h"
15#include "shader_recompiler/frontend/ir/breadth_first_search.h"
16#include "shader_recompiler/frontend/ir/ir_emitter.h"
17#include "shader_recompiler/frontend/ir/value.h"
18#include "shader_recompiler/ir_opt/passes.h"
19
20namespace Shader::Optimization {
21namespace {
22/// Address in constant buffers to the storage buffer descriptor
23struct StorageBufferAddr {
24 auto operator<=>(const StorageBufferAddr&) const noexcept = default;
25
26 u32 index;
27 u32 offset;
28};
29
30/// Block iterator to a global memory instruction and the storage buffer it uses
31struct StorageInst {
32 StorageBufferAddr storage_buffer;
33 IR::Inst* inst;
34 IR::Block* block;
35};
36
37/// Bias towards a certain range of constant buffers when looking for storage buffers
38struct Bias {
39 u32 index;
40 u32 offset_begin;
41 u32 offset_end;
42};
43
44using boost::container::flat_set;
45using boost::container::small_vector;
46using StorageBufferSet =
47 flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
48using StorageInstVector = small_vector<StorageInst, 24>;
49using StorageWritesSet =
50 flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
51
52struct StorageInfo {
53 StorageBufferSet set;
54 StorageInstVector to_replace;
55 StorageWritesSet writes;
56};
57
58/// Returns true when the instruction is a global memory instruction
59bool IsGlobalMemory(const IR::Inst& inst) {
60 switch (inst.GetOpcode()) {
61 case IR::Opcode::LoadGlobalS8:
62 case IR::Opcode::LoadGlobalU8:
63 case IR::Opcode::LoadGlobalS16:
64 case IR::Opcode::LoadGlobalU16:
65 case IR::Opcode::LoadGlobal32:
66 case IR::Opcode::LoadGlobal64:
67 case IR::Opcode::LoadGlobal128:
68 case IR::Opcode::WriteGlobalS8:
69 case IR::Opcode::WriteGlobalU8:
70 case IR::Opcode::WriteGlobalS16:
71 case IR::Opcode::WriteGlobalU16:
72 case IR::Opcode::WriteGlobal32:
73 case IR::Opcode::WriteGlobal64:
74 case IR::Opcode::WriteGlobal128:
75 case IR::Opcode::GlobalAtomicIAdd32:
76 case IR::Opcode::GlobalAtomicSMin32:
77 case IR::Opcode::GlobalAtomicUMin32:
78 case IR::Opcode::GlobalAtomicSMax32:
79 case IR::Opcode::GlobalAtomicUMax32:
80 case IR::Opcode::GlobalAtomicInc32:
81 case IR::Opcode::GlobalAtomicDec32:
82 case IR::Opcode::GlobalAtomicAnd32:
83 case IR::Opcode::GlobalAtomicOr32:
84 case IR::Opcode::GlobalAtomicXor32:
85 case IR::Opcode::GlobalAtomicExchange32:
86 case IR::Opcode::GlobalAtomicIAdd64:
87 case IR::Opcode::GlobalAtomicSMin64:
88 case IR::Opcode::GlobalAtomicUMin64:
89 case IR::Opcode::GlobalAtomicSMax64:
90 case IR::Opcode::GlobalAtomicUMax64:
91 case IR::Opcode::GlobalAtomicAnd64:
92 case IR::Opcode::GlobalAtomicOr64:
93 case IR::Opcode::GlobalAtomicXor64:
94 case IR::Opcode::GlobalAtomicExchange64:
95 case IR::Opcode::GlobalAtomicAddF32:
96 case IR::Opcode::GlobalAtomicAddF16x2:
97 case IR::Opcode::GlobalAtomicAddF32x2:
98 case IR::Opcode::GlobalAtomicMinF16x2:
99 case IR::Opcode::GlobalAtomicMinF32x2:
100 case IR::Opcode::GlobalAtomicMaxF16x2:
101 case IR::Opcode::GlobalAtomicMaxF32x2:
102 return true;
103 default:
104 return false;
105 }
106}
107
108/// Returns true when the instruction is a global memory instruction
109bool IsGlobalMemoryWrite(const IR::Inst& inst) {
110 switch (inst.GetOpcode()) {
111 case IR::Opcode::WriteGlobalS8:
112 case IR::Opcode::WriteGlobalU8:
113 case IR::Opcode::WriteGlobalS16:
114 case IR::Opcode::WriteGlobalU16:
115 case IR::Opcode::WriteGlobal32:
116 case IR::Opcode::WriteGlobal64:
117 case IR::Opcode::WriteGlobal128:
118 case IR::Opcode::GlobalAtomicIAdd32:
119 case IR::Opcode::GlobalAtomicSMin32:
120 case IR::Opcode::GlobalAtomicUMin32:
121 case IR::Opcode::GlobalAtomicSMax32:
122 case IR::Opcode::GlobalAtomicUMax32:
123 case IR::Opcode::GlobalAtomicInc32:
124 case IR::Opcode::GlobalAtomicDec32:
125 case IR::Opcode::GlobalAtomicAnd32:
126 case IR::Opcode::GlobalAtomicOr32:
127 case IR::Opcode::GlobalAtomicXor32:
128 case IR::Opcode::GlobalAtomicExchange32:
129 case IR::Opcode::GlobalAtomicIAdd64:
130 case IR::Opcode::GlobalAtomicSMin64:
131 case IR::Opcode::GlobalAtomicUMin64:
132 case IR::Opcode::GlobalAtomicSMax64:
133 case IR::Opcode::GlobalAtomicUMax64:
134 case IR::Opcode::GlobalAtomicAnd64:
135 case IR::Opcode::GlobalAtomicOr64:
136 case IR::Opcode::GlobalAtomicXor64:
137 case IR::Opcode::GlobalAtomicExchange64:
138 case IR::Opcode::GlobalAtomicAddF32:
139 case IR::Opcode::GlobalAtomicAddF16x2:
140 case IR::Opcode::GlobalAtomicAddF32x2:
141 case IR::Opcode::GlobalAtomicMinF16x2:
142 case IR::Opcode::GlobalAtomicMinF32x2:
143 case IR::Opcode::GlobalAtomicMaxF16x2:
144 case IR::Opcode::GlobalAtomicMaxF32x2:
145 return true;
146 default:
147 return false;
148 }
149}
150
151/// Converts a global memory opcode to its storage buffer equivalent
152IR::Opcode GlobalToStorage(IR::Opcode opcode) {
153 switch (opcode) {
154 case IR::Opcode::LoadGlobalS8:
155 return IR::Opcode::LoadStorageS8;
156 case IR::Opcode::LoadGlobalU8:
157 return IR::Opcode::LoadStorageU8;
158 case IR::Opcode::LoadGlobalS16:
159 return IR::Opcode::LoadStorageS16;
160 case IR::Opcode::LoadGlobalU16:
161 return IR::Opcode::LoadStorageU16;
162 case IR::Opcode::LoadGlobal32:
163 return IR::Opcode::LoadStorage32;
164 case IR::Opcode::LoadGlobal64:
165 return IR::Opcode::LoadStorage64;
166 case IR::Opcode::LoadGlobal128:
167 return IR::Opcode::LoadStorage128;
168 case IR::Opcode::WriteGlobalS8:
169 return IR::Opcode::WriteStorageS8;
170 case IR::Opcode::WriteGlobalU8:
171 return IR::Opcode::WriteStorageU8;
172 case IR::Opcode::WriteGlobalS16:
173 return IR::Opcode::WriteStorageS16;
174 case IR::Opcode::WriteGlobalU16:
175 return IR::Opcode::WriteStorageU16;
176 case IR::Opcode::WriteGlobal32:
177 return IR::Opcode::WriteStorage32;
178 case IR::Opcode::WriteGlobal64:
179 return IR::Opcode::WriteStorage64;
180 case IR::Opcode::WriteGlobal128:
181 return IR::Opcode::WriteStorage128;
182 case IR::Opcode::GlobalAtomicIAdd32:
183 return IR::Opcode::StorageAtomicIAdd32;
184 case IR::Opcode::GlobalAtomicSMin32:
185 return IR::Opcode::StorageAtomicSMin32;
186 case IR::Opcode::GlobalAtomicUMin32:
187 return IR::Opcode::StorageAtomicUMin32;
188 case IR::Opcode::GlobalAtomicSMax32:
189 return IR::Opcode::StorageAtomicSMax32;
190 case IR::Opcode::GlobalAtomicUMax32:
191 return IR::Opcode::StorageAtomicUMax32;
192 case IR::Opcode::GlobalAtomicInc32:
193 return IR::Opcode::StorageAtomicInc32;
194 case IR::Opcode::GlobalAtomicDec32:
195 return IR::Opcode::StorageAtomicDec32;
196 case IR::Opcode::GlobalAtomicAnd32:
197 return IR::Opcode::StorageAtomicAnd32;
198 case IR::Opcode::GlobalAtomicOr32:
199 return IR::Opcode::StorageAtomicOr32;
200 case IR::Opcode::GlobalAtomicXor32:
201 return IR::Opcode::StorageAtomicXor32;
202 case IR::Opcode::GlobalAtomicIAdd64:
203 return IR::Opcode::StorageAtomicIAdd64;
204 case IR::Opcode::GlobalAtomicSMin64:
205 return IR::Opcode::StorageAtomicSMin64;
206 case IR::Opcode::GlobalAtomicUMin64:
207 return IR::Opcode::StorageAtomicUMin64;
208 case IR::Opcode::GlobalAtomicSMax64:
209 return IR::Opcode::StorageAtomicSMax64;
210 case IR::Opcode::GlobalAtomicUMax64:
211 return IR::Opcode::StorageAtomicUMax64;
212 case IR::Opcode::GlobalAtomicAnd64:
213 return IR::Opcode::StorageAtomicAnd64;
214 case IR::Opcode::GlobalAtomicOr64:
215 return IR::Opcode::StorageAtomicOr64;
216 case IR::Opcode::GlobalAtomicXor64:
217 return IR::Opcode::StorageAtomicXor64;
218 case IR::Opcode::GlobalAtomicExchange32:
219 return IR::Opcode::StorageAtomicExchange32;
220 case IR::Opcode::GlobalAtomicExchange64:
221 return IR::Opcode::StorageAtomicExchange64;
222 case IR::Opcode::GlobalAtomicAddF32:
223 return IR::Opcode::StorageAtomicAddF32;
224 case IR::Opcode::GlobalAtomicAddF16x2:
225 return IR::Opcode::StorageAtomicAddF16x2;
226 case IR::Opcode::GlobalAtomicMinF16x2:
227 return IR::Opcode::StorageAtomicMinF16x2;
228 case IR::Opcode::GlobalAtomicMaxF16x2:
229 return IR::Opcode::StorageAtomicMaxF16x2;
230 case IR::Opcode::GlobalAtomicAddF32x2:
231 return IR::Opcode::StorageAtomicAddF32x2;
232 case IR::Opcode::GlobalAtomicMinF32x2:
233 return IR::Opcode::StorageAtomicMinF32x2;
234 case IR::Opcode::GlobalAtomicMaxF32x2:
235 return IR::Opcode::StorageAtomicMaxF32x2;
236 default:
237 throw InvalidArgument("Invalid global memory opcode {}", opcode);
238 }
239}
240
241/// Returns true when a storage buffer address satisfies a bias
242bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
243 return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
244 storage_buffer.offset < bias.offset_end;
245}
246
247struct LowAddrInfo {
248 IR::U32 value;
249 s32 imm_offset;
250};
251
252/// Tries to track the first 32-bits of a global memory instruction
253std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
254 // The first argument is the low level GPU pointer to the global memory instruction
255 const IR::Value addr{inst->Arg(0)};
256 if (addr.IsImmediate()) {
257 // Not much we can do if it's an immediate
258 return std::nullopt;
259 }
260 // This address is expected to either be a PackUint2x32, a IAdd64, or a CompositeConstructU32x2
261 IR::Inst* addr_inst{addr.InstRecursive()};
262 s32 imm_offset{0};
263 if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) {
264 // If it's an IAdd64, get the immediate offset it is applying and grab the address
265 // instruction. This expects for the instruction to be canonicalized having the address on
266 // the first argument and the immediate offset on the second one.
267 const IR::U64 imm_offset_value{addr_inst->Arg(1)};
268 if (!imm_offset_value.IsImmediate()) {
269 return std::nullopt;
270 }
271 imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64()));
272 const IR::U64 iadd_addr{addr_inst->Arg(0)};
273 if (iadd_addr.IsImmediate()) {
274 return std::nullopt;
275 }
276 addr_inst = iadd_addr.InstRecursive();
277 }
278 // With IAdd64 handled, now PackUint2x32 is expected
279 if (addr_inst->GetOpcode() == IR::Opcode::PackUint2x32) {
280 // PackUint2x32 is expected to be generated from a vector
281 const IR::Value vector{addr_inst->Arg(0)};
282 if (vector.IsImmediate()) {
283 return std::nullopt;
284 }
285 addr_inst = vector.InstRecursive();
286 }
287 // The vector is expected to be a CompositeConstructU32x2
288 if (addr_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) {
289 return std::nullopt;
290 }
291 // Grab the first argument from the CompositeConstructU32x2, this is the low address.
292 return LowAddrInfo{
293 .value{IR::U32{addr_inst->Arg(0)}},
294 .imm_offset = imm_offset,
295 };
296}
297
298/// Tries to track the storage buffer address used by a global memory instruction
299std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
300 const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
301 if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
302 return std::nullopt;
303 }
304 const IR::Value index{inst->Arg(0)};
305 const IR::Value offset{inst->Arg(1)};
306 if (!index.IsImmediate()) {
307 // Definitely not a storage buffer if it's read from a
308 // non-immediate index
309 return std::nullopt;
310 }
311 if (!offset.IsImmediate()) {
312 // TODO: Support SSBO arrays
313 return std::nullopt;
314 }
315 const StorageBufferAddr storage_buffer{
316 .index = index.U32(),
317 .offset = offset.U32(),
318 };
319 if (!Common::IsAligned(storage_buffer.offset, 16)) {
320 // The SSBO pointer has to be aligned
321 return std::nullopt;
322 }
323 if (bias && !MeetsBias(storage_buffer, *bias)) {
324 // We have to blacklist some addresses in case we wrongly
325 // point to them
326 return std::nullopt;
327 }
328 return storage_buffer;
329 }};
330 return BreadthFirstSearch(value, pred);
331}
332
333/// Collects the storage buffer used by a global memory instruction and the instruction itself
334void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) {
335 // NVN puts storage buffers in a specific range, we have to bias towards these addresses to
336 // avoid getting false positives
337 static constexpr Bias nvn_bias{
338 .index = 0,
339 .offset_begin = 0x110,
340 .offset_end = 0x610,
341 };
342 // Track the low address of the instruction
343 const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
344 if (!low_addr_info) {
345 // Failed to track the low address, use NVN fallbacks
346 return;
347 }
348 // First try to find storage buffers in the NVN address
349 const IR::U32 low_addr{low_addr_info->value};
350 std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
351 if (!storage_buffer) {
352 // If it fails, track without a bias
353 storage_buffer = Track(low_addr, nullptr);
354 if (!storage_buffer) {
355 // If that also fails, use NVN fallbacks
356 return;
357 }
358 }
359 // Collect storage buffer and the instruction
360 if (IsGlobalMemoryWrite(inst)) {
361 info.writes.insert(*storage_buffer);
362 }
363 info.set.insert(*storage_buffer);
364 info.to_replace.push_back(StorageInst{
365 .storage_buffer{*storage_buffer},
366 .inst = &inst,
367 .block = &block,
368 });
369}
370
371/// Returns the offset in indices (not bytes) for an equivalent storage instruction
372IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
373 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
374 IR::U32 offset;
375 if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
376 offset = low_addr->value;
377 if (low_addr->imm_offset != 0) {
378 offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset));
379 }
380 } else {
381 offset = ir.UConvert(32, IR::U64{inst.Arg(0)});
382 }
383 // Subtract the least significant 32 bits from the guest offset. The result is the storage
384 // buffer offset in bytes.
385 const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
386 return ir.ISub(offset, low_cbuf);
387}
388
389/// Replace a global memory load instruction with its storage buffer equivalent
390void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
391 const IR::U32& offset) {
392 const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
393 const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
394 const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})};
395 inst.ReplaceUsesWith(value);
396}
397
398/// Replace a global memory write instruction with its storage buffer equivalent
399void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
400 const IR::U32& offset) {
401 const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
402 const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
403 block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)});
404 inst.Invalidate();
405}
406
407/// Replace an atomic operation on global memory instruction with its storage buffer equivalent
408void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
409 const IR::U32& offset) {
410 const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
411 const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
412 const IR::Value value{
413 &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})};
414 inst.ReplaceUsesWith(value);
415}
416
417/// Replace a global memory instruction with its storage buffer equivalent
418void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
419 const IR::U32& offset) {
420 switch (inst.GetOpcode()) {
421 case IR::Opcode::LoadGlobalS8:
422 case IR::Opcode::LoadGlobalU8:
423 case IR::Opcode::LoadGlobalS16:
424 case IR::Opcode::LoadGlobalU16:
425 case IR::Opcode::LoadGlobal32:
426 case IR::Opcode::LoadGlobal64:
427 case IR::Opcode::LoadGlobal128:
428 return ReplaceLoad(block, inst, storage_index, offset);
429 case IR::Opcode::WriteGlobalS8:
430 case IR::Opcode::WriteGlobalU8:
431 case IR::Opcode::WriteGlobalS16:
432 case IR::Opcode::WriteGlobalU16:
433 case IR::Opcode::WriteGlobal32:
434 case IR::Opcode::WriteGlobal64:
435 case IR::Opcode::WriteGlobal128:
436 return ReplaceWrite(block, inst, storage_index, offset);
437 case IR::Opcode::GlobalAtomicIAdd32:
438 case IR::Opcode::GlobalAtomicSMin32:
439 case IR::Opcode::GlobalAtomicUMin32:
440 case IR::Opcode::GlobalAtomicSMax32:
441 case IR::Opcode::GlobalAtomicUMax32:
442 case IR::Opcode::GlobalAtomicInc32:
443 case IR::Opcode::GlobalAtomicDec32:
444 case IR::Opcode::GlobalAtomicAnd32:
445 case IR::Opcode::GlobalAtomicOr32:
446 case IR::Opcode::GlobalAtomicXor32:
447 case IR::Opcode::GlobalAtomicExchange32:
448 case IR::Opcode::GlobalAtomicIAdd64:
449 case IR::Opcode::GlobalAtomicSMin64:
450 case IR::Opcode::GlobalAtomicUMin64:
451 case IR::Opcode::GlobalAtomicSMax64:
452 case IR::Opcode::GlobalAtomicUMax64:
453 case IR::Opcode::GlobalAtomicAnd64:
454 case IR::Opcode::GlobalAtomicOr64:
455 case IR::Opcode::GlobalAtomicXor64:
456 case IR::Opcode::GlobalAtomicExchange64:
457 case IR::Opcode::GlobalAtomicAddF32:
458 case IR::Opcode::GlobalAtomicAddF16x2:
459 case IR::Opcode::GlobalAtomicAddF32x2:
460 case IR::Opcode::GlobalAtomicMinF16x2:
461 case IR::Opcode::GlobalAtomicMinF32x2:
462 case IR::Opcode::GlobalAtomicMaxF16x2:
463 case IR::Opcode::GlobalAtomicMaxF32x2:
464 return ReplaceAtomic(block, inst, storage_index, offset);
465 default:
466 throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode());
467 }
468}
469} // Anonymous namespace
470
471void GlobalMemoryToStorageBufferPass(IR::Program& program) {
472 StorageInfo info;
473 for (IR::Block* const block : program.post_order_blocks) {
474 for (IR::Inst& inst : block->Instructions()) {
475 if (!IsGlobalMemory(inst)) {
476 continue;
477 }
478 CollectStorageBuffers(*block, inst, info);
479 }
480 }
481 for (const StorageBufferAddr& storage_buffer : info.set) {
482 program.info.storage_buffers_descriptors.push_back({
483 .cbuf_index = storage_buffer.index,
484 .cbuf_offset = storage_buffer.offset,
485 .count = 1,
486 .is_written = info.writes.contains(storage_buffer),
487 });
488 }
489 for (const StorageInst& storage_inst : info.to_replace) {
490 const StorageBufferAddr storage_buffer{storage_inst.storage_buffer};
491 const auto it{info.set.find(storage_inst.storage_buffer)};
492 const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
493 IR::Block* const block{storage_inst.block};
494 IR::Inst* const inst{storage_inst.inst};
495 const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
496 Replace(*block, *inst, index, offset);
497 }
498}
499
500template <typename Descriptors, typename Descriptor, typename Func>
501static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
502 // TODO: Handle arrays
503 const auto it{std::ranges::find_if(descriptors, pred)};
504 if (it != descriptors.end()) {
505 return static_cast<u32>(std::distance(descriptors.begin(), it));
506 }
507 descriptors.push_back(desc);
508 return static_cast<u32>(descriptors.size()) - 1;
509}
510
511void JoinStorageInfo(Info& base, Info& source) {
512 auto& descriptors = base.storage_buffers_descriptors;
513 for (auto& desc : source.storage_buffers_descriptors) {
514 auto it{std::ranges::find_if(descriptors, [&desc](const auto& existing) {
515 return desc.cbuf_index == existing.cbuf_index &&
516 desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count;
517 })};
518 if (it != descriptors.end()) {
519 it->is_written |= desc.is_written;
520 continue;
521 }
522 descriptors.push_back(desc);
523 }
524}
525
526} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
new file mode 100644
index 000000000..e9b55f835
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
@@ -0,0 +1,38 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <vector>
6
7#include "shader_recompiler/frontend/ir/basic_block.h"
8#include "shader_recompiler/frontend/ir/value.h"
9#include "shader_recompiler/ir_opt/passes.h"
10
11namespace Shader::Optimization {
12
13void IdentityRemovalPass(IR::Program& program) {
14 std::vector<IR::Inst*> to_invalidate;
15 for (IR::Block* const block : program.blocks) {
16 for (auto inst = block->begin(); inst != block->end();) {
17 const size_t num_args{inst->NumArgs()};
18 for (size_t i = 0; i < num_args; ++i) {
19 IR::Value arg;
20 while ((arg = inst->Arg(i)).IsIdentity()) {
21 inst->SetArg(i, arg.Inst()->Arg(0));
22 }
23 }
24 if (inst->GetOpcode() == IR::Opcode::Identity ||
25 inst->GetOpcode() == IR::Opcode::Void) {
26 to_invalidate.push_back(&*inst);
27 inst = block->Instructions().erase(inst);
28 } else {
29 ++inst;
30 }
31 }
32 }
33 for (IR::Inst* const inst : to_invalidate) {
34 inst->Invalidate();
35 }
36}
37
38} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
new file mode 100644
index 000000000..773e1f961
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
@@ -0,0 +1,143 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include "shader_recompiler/frontend/ir/ir_emitter.h"
8#include "shader_recompiler/frontend/ir/value.h"
9#include "shader_recompiler/ir_opt/passes.h"
10
11namespace Shader::Optimization {
12namespace {
13IR::Opcode Replace(IR::Opcode op) {
14 switch (op) {
15 case IR::Opcode::FPAbs16:
16 return IR::Opcode::FPAbs32;
17 case IR::Opcode::FPAdd16:
18 return IR::Opcode::FPAdd32;
19 case IR::Opcode::FPCeil16:
20 return IR::Opcode::FPCeil32;
21 case IR::Opcode::FPFloor16:
22 return IR::Opcode::FPFloor32;
23 case IR::Opcode::FPFma16:
24 return IR::Opcode::FPFma32;
25 case IR::Opcode::FPMul16:
26 return IR::Opcode::FPMul32;
27 case IR::Opcode::FPNeg16:
28 return IR::Opcode::FPNeg32;
29 case IR::Opcode::FPRoundEven16:
30 return IR::Opcode::FPRoundEven32;
31 case IR::Opcode::FPSaturate16:
32 return IR::Opcode::FPSaturate32;
33 case IR::Opcode::FPClamp16:
34 return IR::Opcode::FPClamp32;
35 case IR::Opcode::FPTrunc16:
36 return IR::Opcode::FPTrunc32;
37 case IR::Opcode::CompositeConstructF16x2:
38 return IR::Opcode::CompositeConstructF32x2;
39 case IR::Opcode::CompositeConstructF16x3:
40 return IR::Opcode::CompositeConstructF32x3;
41 case IR::Opcode::CompositeConstructF16x4:
42 return IR::Opcode::CompositeConstructF32x4;
43 case IR::Opcode::CompositeExtractF16x2:
44 return IR::Opcode::CompositeExtractF32x2;
45 case IR::Opcode::CompositeExtractF16x3:
46 return IR::Opcode::CompositeExtractF32x3;
47 case IR::Opcode::CompositeExtractF16x4:
48 return IR::Opcode::CompositeExtractF32x4;
49 case IR::Opcode::CompositeInsertF16x2:
50 return IR::Opcode::CompositeInsertF32x2;
51 case IR::Opcode::CompositeInsertF16x3:
52 return IR::Opcode::CompositeInsertF32x3;
53 case IR::Opcode::CompositeInsertF16x4:
54 return IR::Opcode::CompositeInsertF32x4;
55 case IR::Opcode::FPOrdEqual16:
56 return IR::Opcode::FPOrdEqual32;
57 case IR::Opcode::FPUnordEqual16:
58 return IR::Opcode::FPUnordEqual32;
59 case IR::Opcode::FPOrdNotEqual16:
60 return IR::Opcode::FPOrdNotEqual32;
61 case IR::Opcode::FPUnordNotEqual16:
62 return IR::Opcode::FPUnordNotEqual32;
63 case IR::Opcode::FPOrdLessThan16:
64 return IR::Opcode::FPOrdLessThan32;
65 case IR::Opcode::FPUnordLessThan16:
66 return IR::Opcode::FPUnordLessThan32;
67 case IR::Opcode::FPOrdGreaterThan16:
68 return IR::Opcode::FPOrdGreaterThan32;
69 case IR::Opcode::FPUnordGreaterThan16:
70 return IR::Opcode::FPUnordGreaterThan32;
71 case IR::Opcode::FPOrdLessThanEqual16:
72 return IR::Opcode::FPOrdLessThanEqual32;
73 case IR::Opcode::FPUnordLessThanEqual16:
74 return IR::Opcode::FPUnordLessThanEqual32;
75 case IR::Opcode::FPOrdGreaterThanEqual16:
76 return IR::Opcode::FPOrdGreaterThanEqual32;
77 case IR::Opcode::FPUnordGreaterThanEqual16:
78 return IR::Opcode::FPUnordGreaterThanEqual32;
79 case IR::Opcode::FPIsNan16:
80 return IR::Opcode::FPIsNan32;
81 case IR::Opcode::ConvertS16F16:
82 return IR::Opcode::ConvertS16F32;
83 case IR::Opcode::ConvertS32F16:
84 return IR::Opcode::ConvertS32F32;
85 case IR::Opcode::ConvertS64F16:
86 return IR::Opcode::ConvertS64F32;
87 case IR::Opcode::ConvertU16F16:
88 return IR::Opcode::ConvertU16F32;
89 case IR::Opcode::ConvertU32F16:
90 return IR::Opcode::ConvertU32F32;
91 case IR::Opcode::ConvertU64F16:
92 return IR::Opcode::ConvertU64F32;
93 case IR::Opcode::PackFloat2x16:
94 return IR::Opcode::PackHalf2x16;
95 case IR::Opcode::UnpackFloat2x16:
96 return IR::Opcode::UnpackHalf2x16;
97 case IR::Opcode::ConvertF32F16:
98 return IR::Opcode::Identity;
99 case IR::Opcode::ConvertF16F32:
100 return IR::Opcode::Identity;
101 case IR::Opcode::ConvertF16S8:
102 return IR::Opcode::ConvertF32S8;
103 case IR::Opcode::ConvertF16S16:
104 return IR::Opcode::ConvertF32S16;
105 case IR::Opcode::ConvertF16S32:
106 return IR::Opcode::ConvertF32S32;
107 case IR::Opcode::ConvertF16S64:
108 return IR::Opcode::ConvertF32S64;
109 case IR::Opcode::ConvertF16U8:
110 return IR::Opcode::ConvertF32U8;
111 case IR::Opcode::ConvertF16U16:
112 return IR::Opcode::ConvertF32U16;
113 case IR::Opcode::ConvertF16U32:
114 return IR::Opcode::ConvertF32U32;
115 case IR::Opcode::ConvertF16U64:
116 return IR::Opcode::ConvertF32U64;
117 case IR::Opcode::GlobalAtomicAddF16x2:
118 return IR::Opcode::GlobalAtomicAddF32x2;
119 case IR::Opcode::StorageAtomicAddF16x2:
120 return IR::Opcode::StorageAtomicAddF32x2;
121 case IR::Opcode::GlobalAtomicMinF16x2:
122 return IR::Opcode::GlobalAtomicMinF32x2;
123 case IR::Opcode::StorageAtomicMinF16x2:
124 return IR::Opcode::StorageAtomicMinF32x2;
125 case IR::Opcode::GlobalAtomicMaxF16x2:
126 return IR::Opcode::GlobalAtomicMaxF32x2;
127 case IR::Opcode::StorageAtomicMaxF16x2:
128 return IR::Opcode::StorageAtomicMaxF32x2;
129 default:
130 return op;
131 }
132}
133} // Anonymous namespace
134
135void LowerFp16ToFp32(IR::Program& program) {
136 for (IR::Block* const block : program.blocks) {
137 for (IR::Inst& inst : block->Instructions()) {
138 inst.ReplaceOpcode(Replace(inst.GetOpcode()));
139 }
140 }
141}
142
143} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
new file mode 100644
index 000000000..e80d3d1d9
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
@@ -0,0 +1,218 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9#include "shader_recompiler/frontend/ir/ir_emitter.h"
10#include "shader_recompiler/frontend/ir/program.h"
11#include "shader_recompiler/frontend/ir/value.h"
12#include "shader_recompiler/ir_opt/passes.h"
13
14namespace Shader::Optimization {
15namespace {
16std::pair<IR::U32, IR::U32> Unpack(IR::IREmitter& ir, const IR::Value& packed) {
17 if (packed.IsImmediate()) {
18 const u64 value{packed.U64()};
19 return {
20 ir.Imm32(static_cast<u32>(value)),
21 ir.Imm32(static_cast<u32>(value >> 32)),
22 };
23 } else {
24 return std::pair<IR::U32, IR::U32>{
25 ir.CompositeExtract(packed, 0u),
26 ir.CompositeExtract(packed, 1u),
27 };
28 }
29}
30
31void IAdd64To32(IR::Block& block, IR::Inst& inst) {
32 if (inst.HasAssociatedPseudoOperation()) {
33 throw NotImplementedException("IAdd64 emulation with pseudo instructions");
34 }
35 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
36 const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
37 const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
38
39 const IR::U32 ret_lo{ir.IAdd(a_lo, b_lo)};
40 const IR::U32 carry{ir.Select(ir.GetCarryFromOp(ret_lo), ir.Imm32(1u), ir.Imm32(0u))};
41
42 const IR::U32 ret_hi{ir.IAdd(ir.IAdd(a_hi, b_hi), carry)};
43 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
44}
45
46void ISub64To32(IR::Block& block, IR::Inst& inst) {
47 if (inst.HasAssociatedPseudoOperation()) {
48 throw NotImplementedException("ISub64 emulation with pseudo instructions");
49 }
50 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
51 const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
52 const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
53
54 const IR::U32 ret_lo{ir.ISub(a_lo, b_lo)};
55 const IR::U1 underflow{ir.IGreaterThan(ret_lo, a_lo, false)};
56 const IR::U32 underflow_bit{ir.Select(underflow, ir.Imm32(1u), ir.Imm32(0u))};
57
58 const IR::U32 ret_hi{ir.ISub(ir.ISub(a_hi, b_hi), underflow_bit)};
59 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
60}
61
62void INeg64To32(IR::Block& block, IR::Inst& inst) {
63 if (inst.HasAssociatedPseudoOperation()) {
64 throw NotImplementedException("INeg64 emulation with pseudo instructions");
65 }
66 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
67 auto [lo, hi]{Unpack(ir, inst.Arg(0))};
68 lo = ir.BitwiseNot(lo);
69 hi = ir.BitwiseNot(hi);
70
71 lo = ir.IAdd(lo, ir.Imm32(1));
72
73 const IR::U32 carry{ir.Select(ir.GetCarryFromOp(lo), ir.Imm32(1u), ir.Imm32(0u))};
74 hi = ir.IAdd(hi, carry);
75
76 inst.ReplaceUsesWith(ir.CompositeConstruct(lo, hi));
77}
78
79void ShiftLeftLogical64To32(IR::Block& block, IR::Inst& inst) {
80 if (inst.HasAssociatedPseudoOperation()) {
81 throw NotImplementedException("ShiftLeftLogical64 emulation with pseudo instructions");
82 }
83 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
84 const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
85 const IR::U32 shift{inst.Arg(1)};
86
87 const IR::U32 shifted_lo{ir.ShiftLeftLogical(lo, shift)};
88 const IR::U32 shifted_hi{ir.ShiftLeftLogical(hi, shift)};
89
90 const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
91 const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
92 const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
93
94 const IR::U32 long_ret_lo{ir.Imm32(0)};
95 const IR::U32 long_ret_hi{ir.ShiftLeftLogical(lo, inv_shift)};
96
97 const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
98 const IR::U32 lo_extract{ir.BitFieldExtract(lo, shift_complement, shift, false)};
99 const IR::U32 short_ret_lo{shifted_lo};
100 const IR::U32 short_ret_hi{ir.BitwiseOr(shifted_hi, lo_extract)};
101
102 const IR::U32 zero_ret_lo{lo};
103 const IR::U32 zero_ret_hi{hi};
104
105 const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
106 const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
107
108 const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
109 const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
110 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
111}
112
113void ShiftRightLogical64To32(IR::Block& block, IR::Inst& inst) {
114 if (inst.HasAssociatedPseudoOperation()) {
115 throw NotImplementedException("ShiftRightLogical64 emulation with pseudo instructions");
116 }
117 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
118 const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
119 const IR::U32 shift{inst.Arg(1)};
120
121 const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
122 const IR::U32 shifted_hi{ir.ShiftRightLogical(hi, shift)};
123
124 const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
125 const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
126 const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
127
128 const IR::U32 long_ret_hi{ir.Imm32(0)};
129 const IR::U32 long_ret_lo{ir.ShiftRightLogical(hi, inv_shift)};
130
131 const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
132 const IR::U32 short_hi_extract{ir.BitFieldExtract(hi, ir.Imm32(0), shift)};
133 const IR::U32 short_ret_hi{shifted_hi};
134 const IR::U32 short_ret_lo{
135 ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
136
137 const IR::U32 zero_ret_lo{lo};
138 const IR::U32 zero_ret_hi{hi};
139
140 const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
141 const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
142
143 const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
144 const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
145 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
146}
147
148void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) {
149 if (inst.HasAssociatedPseudoOperation()) {
150 throw NotImplementedException("ShiftRightArithmetic64 emulation with pseudo instructions");
151 }
152 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
153 const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
154 const IR::U32 shift{inst.Arg(1)};
155
156 const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
157 const IR::U32 shifted_hi{ir.ShiftRightArithmetic(hi, shift)};
158
159 const IR::U32 sign_extension{ir.ShiftRightArithmetic(hi, ir.Imm32(31))};
160
161 const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
162 const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
163 const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
164
165 const IR::U32 long_ret_hi{sign_extension};
166 const IR::U32 long_ret_lo{ir.ShiftRightArithmetic(hi, inv_shift)};
167
168 const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
169 const IR::U32 short_hi_extract(ir.BitFieldExtract(hi, ir.Imm32(0), shift));
170 const IR::U32 short_ret_hi{shifted_hi};
171 const IR::U32 short_ret_lo{
172 ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
173
174 const IR::U32 zero_ret_lo{lo};
175 const IR::U32 zero_ret_hi{hi};
176
177 const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
178 const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
179
180 const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
181 const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
182 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
183}
184
185void Lower(IR::Block& block, IR::Inst& inst) {
186 switch (inst.GetOpcode()) {
187 case IR::Opcode::PackUint2x32:
188 case IR::Opcode::UnpackUint2x32:
189 return inst.ReplaceOpcode(IR::Opcode::Identity);
190 case IR::Opcode::IAdd64:
191 return IAdd64To32(block, inst);
192 case IR::Opcode::ISub64:
193 return ISub64To32(block, inst);
194 case IR::Opcode::INeg64:
195 return INeg64To32(block, inst);
196 case IR::Opcode::ShiftLeftLogical64:
197 return ShiftLeftLogical64To32(block, inst);
198 case IR::Opcode::ShiftRightLogical64:
199 return ShiftRightLogical64To32(block, inst);
200 case IR::Opcode::ShiftRightArithmetic64:
201 return ShiftRightArithmetic64To32(block, inst);
202 default:
203 break;
204 }
205}
206} // Anonymous namespace
207
208void LowerInt64ToInt32(IR::Program& program) {
209 const auto end{program.post_order_blocks.rend()};
210 for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
211 IR::Block* const block{*it};
212 for (IR::Inst& inst : block->Instructions()) {
213 Lower(*block, inst);
214 }
215 }
216}
217
218} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
new file mode 100644
index 000000000..2f89b1ea0
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -0,0 +1,32 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <span>
8
9#include "shader_recompiler/environment.h"
10#include "shader_recompiler/frontend/ir/basic_block.h"
11#include "shader_recompiler/frontend/ir/program.h"
12
13namespace Shader::Optimization {
14
15void CollectShaderInfoPass(Environment& env, IR::Program& program);
16void ConstantPropagationPass(IR::Program& program);
17void DeadCodeEliminationPass(IR::Program& program);
18void GlobalMemoryToStorageBufferPass(IR::Program& program);
19void IdentityRemovalPass(IR::Program& program);
20void LowerFp16ToFp32(IR::Program& program);
21void LowerInt64ToInt32(IR::Program& program);
22void SsaRewritePass(IR::Program& program);
23void TexturePass(Environment& env, IR::Program& program);
24void VerificationPass(const IR::Program& program);
25
26// Dual Vertex
27void VertexATransformPass(IR::Program& program);
28void VertexBTransformPass(IR::Program& program);
29void JoinTextureInfo(Info& base, Info& source);
30void JoinStorageInfo(Info& base, Info& source);
31
32} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
new file mode 100644
index 000000000..53145fb5e
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -0,0 +1,383 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// This file implements the SSA rewriting algorithm proposed in
6//
7// Simple and Efficient Construction of Static Single Assignment Form.
8// Braun M., Buchwald S., Hack S., Leiba R., Mallon C., Zwinkau A. (2013)
9// In: Jhala R., De Bosschere K. (eds)
10// Compiler Construction. CC 2013.
11// Lecture Notes in Computer Science, vol 7791.
12// Springer, Berlin, Heidelberg
13//
14// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6
15//
16
17#include <span>
18#include <variant>
19#include <vector>
20
21#include <boost/container/flat_map.hpp>
22#include <boost/container/flat_set.hpp>
23
24#include "shader_recompiler/frontend/ir/basic_block.h"
25#include "shader_recompiler/frontend/ir/opcodes.h"
26#include "shader_recompiler/frontend/ir/pred.h"
27#include "shader_recompiler/frontend/ir/reg.h"
28#include "shader_recompiler/frontend/ir/value.h"
29#include "shader_recompiler/ir_opt/passes.h"
30
31namespace Shader::Optimization {
32namespace {
33struct FlagTag {
34 auto operator<=>(const FlagTag&) const noexcept = default;
35};
36struct ZeroFlagTag : FlagTag {};
37struct SignFlagTag : FlagTag {};
38struct CarryFlagTag : FlagTag {};
39struct OverflowFlagTag : FlagTag {};
40
41struct GotoVariable : FlagTag {
42 GotoVariable() = default;
43 explicit GotoVariable(u32 index_) : index{index_} {}
44
45 auto operator<=>(const GotoVariable&) const noexcept = default;
46
47 u32 index;
48};
49
50struct IndirectBranchVariable {
51 auto operator<=>(const IndirectBranchVariable&) const noexcept = default;
52};
53
54using Variant = std::variant<IR::Reg, IR::Pred, ZeroFlagTag, SignFlagTag, CarryFlagTag,
55 OverflowFlagTag, GotoVariable, IndirectBranchVariable>;
56using ValueMap = boost::container::flat_map<IR::Block*, IR::Value>;
57
58struct DefTable {
59 const IR::Value& Def(IR::Block* block, IR::Reg variable) {
60 return block->SsaRegValue(variable);
61 }
62 void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) {
63 block->SetSsaRegValue(variable, value);
64 }
65
66 const IR::Value& Def(IR::Block* block, IR::Pred variable) {
67 return preds[IR::PredIndex(variable)][block];
68 }
69 void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) {
70 preds[IR::PredIndex(variable)].insert_or_assign(block, value);
71 }
72
73 const IR::Value& Def(IR::Block* block, GotoVariable variable) {
74 return goto_vars[variable.index][block];
75 }
76 void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) {
77 goto_vars[variable.index].insert_or_assign(block, value);
78 }
79
80 const IR::Value& Def(IR::Block* block, IndirectBranchVariable) {
81 return indirect_branch_var[block];
82 }
83 void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) {
84 indirect_branch_var.insert_or_assign(block, value);
85 }
86
87 const IR::Value& Def(IR::Block* block, ZeroFlagTag) {
88 return zero_flag[block];
89 }
90 void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) {
91 zero_flag.insert_or_assign(block, value);
92 }
93
94 const IR::Value& Def(IR::Block* block, SignFlagTag) {
95 return sign_flag[block];
96 }
97 void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) {
98 sign_flag.insert_or_assign(block, value);
99 }
100
101 const IR::Value& Def(IR::Block* block, CarryFlagTag) {
102 return carry_flag[block];
103 }
104 void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) {
105 carry_flag.insert_or_assign(block, value);
106 }
107
108 const IR::Value& Def(IR::Block* block, OverflowFlagTag) {
109 return overflow_flag[block];
110 }
111 void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) {
112 overflow_flag.insert_or_assign(block, value);
113 }
114
115 std::array<ValueMap, IR::NUM_USER_PREDS> preds;
116 boost::container::flat_map<u32, ValueMap> goto_vars;
117 ValueMap indirect_branch_var;
118 ValueMap zero_flag;
119 ValueMap sign_flag;
120 ValueMap carry_flag;
121 ValueMap overflow_flag;
122};
123
124IR::Opcode UndefOpcode(IR::Reg) noexcept {
125 return IR::Opcode::UndefU32;
126}
127
128IR::Opcode UndefOpcode(IR::Pred) noexcept {
129 return IR::Opcode::UndefU1;
130}
131
132IR::Opcode UndefOpcode(const FlagTag&) noexcept {
133 return IR::Opcode::UndefU1;
134}
135
136IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept {
137 return IR::Opcode::UndefU32;
138}
139
140enum class Status {
141 Start,
142 SetValue,
143 PreparePhiArgument,
144 PushPhiArgument,
145};
146
147template <typename Type>
148struct ReadState {
149 ReadState(IR::Block* block_) : block{block_} {}
150 ReadState() = default;
151
152 IR::Block* block{};
153 IR::Value result{};
154 IR::Inst* phi{};
155 IR::Block* const* pred_it{};
156 IR::Block* const* pred_end{};
157 Status pc{Status::Start};
158};
159
160class Pass {
161public:
162 template <typename Type>
163 void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) {
164 current_def.SetDef(block, variable, value);
165 }
166
167 template <typename Type>
168 IR::Value ReadVariable(Type variable, IR::Block* root_block) {
169 boost::container::small_vector<ReadState<Type>, 64> stack{
170 ReadState<Type>(nullptr),
171 ReadState<Type>(root_block),
172 };
173 const auto prepare_phi_operand{[&] {
174 if (stack.back().pred_it == stack.back().pred_end) {
175 IR::Inst* const phi{stack.back().phi};
176 IR::Block* const block{stack.back().block};
177 const IR::Value result{TryRemoveTrivialPhi(*phi, block, UndefOpcode(variable))};
178 stack.pop_back();
179 stack.back().result = result;
180 WriteVariable(variable, block, result);
181 } else {
182 IR::Block* const imm_pred{*stack.back().pred_it};
183 stack.back().pc = Status::PushPhiArgument;
184 stack.emplace_back(imm_pred);
185 }
186 }};
187 do {
188 IR::Block* const block{stack.back().block};
189 switch (stack.back().pc) {
190 case Status::Start: {
191 if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) {
192 stack.back().result = def;
193 } else if (!block->IsSsaSealed()) {
194 // Incomplete CFG
195 IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
196 phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
197
198 incomplete_phis[block].insert_or_assign(variable, phi);
199 stack.back().result = IR::Value{&*phi};
200 } else if (const std::span imm_preds = block->ImmPredecessors();
201 imm_preds.size() == 1) {
202 // Optimize the common case of one predecessor: no phi needed
203 stack.back().pc = Status::SetValue;
204 stack.emplace_back(imm_preds.front());
205 break;
206 } else {
207 // Break potential cycles with operandless phi
208 IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
209 phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
210
211 WriteVariable(variable, block, IR::Value{phi});
212
213 stack.back().phi = phi;
214 stack.back().pred_it = imm_preds.data();
215 stack.back().pred_end = imm_preds.data() + imm_preds.size();
216 prepare_phi_operand();
217 break;
218 }
219 }
220 [[fallthrough]];
221 case Status::SetValue: {
222 const IR::Value result{stack.back().result};
223 WriteVariable(variable, block, result);
224 stack.pop_back();
225 stack.back().result = result;
226 break;
227 }
228 case Status::PushPhiArgument: {
229 IR::Inst* const phi{stack.back().phi};
230 phi->AddPhiOperand(*stack.back().pred_it, stack.back().result);
231 ++stack.back().pred_it;
232 }
233 [[fallthrough]];
234 case Status::PreparePhiArgument:
235 prepare_phi_operand();
236 break;
237 }
238 } while (stack.size() > 1);
239 return stack.back().result;
240 }
241
242 void SealBlock(IR::Block* block) {
243 const auto it{incomplete_phis.find(block)};
244 if (it != incomplete_phis.end()) {
245 for (auto& pair : it->second) {
246 auto& variant{pair.first};
247 auto& phi{pair.second};
248 std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant);
249 }
250 }
251 block->SsaSeal();
252 }
253
254private:
255 template <typename Type>
256 IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) {
257 for (IR::Block* const imm_pred : block->ImmPredecessors()) {
258 phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred));
259 }
260 return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable));
261 }
262
263 IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) {
264 IR::Value same;
265 const size_t num_args{phi.NumArgs()};
266 for (size_t arg_index = 0; arg_index < num_args; ++arg_index) {
267 const IR::Value& op{phi.Arg(arg_index)};
268 if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) {
269 // Unique value or self-reference
270 continue;
271 }
272 if (!same.IsEmpty()) {
273 // The phi merges at least two values: not trivial
274 return IR::Value{&phi};
275 }
276 same = op;
277 }
278 // Remove the phi node from the block, it will be reinserted
279 IR::Block::InstructionList& list{block->Instructions()};
280 list.erase(IR::Block::InstructionList::s_iterator_to(phi));
281
282 // Find the first non-phi instruction and use it as an insertion point
283 IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)};
284 if (same.IsEmpty()) {
285 // The phi is unreachable or in the start block
286 // Insert an undefined instruction and make it the phi node replacement
287 // The "phi" node reinsertion point is specified after this instruction
288 reinsert_point = block->PrependNewInst(reinsert_point, undef_opcode);
289 same = IR::Value{&*reinsert_point};
290 ++reinsert_point;
291 }
292 // Reinsert the phi node and reroute all its uses to the "same" value
293 list.insert(reinsert_point, phi);
294 phi.ReplaceUsesWith(same);
295 // TODO: Try to recursively remove all phi users, which might have become trivial
296 return same;
297 }
298
299 boost::container::flat_map<IR::Block*, boost::container::flat_map<Variant, IR::Inst*>>
300 incomplete_phis;
301 DefTable current_def;
302};
303
304void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
305 switch (inst.GetOpcode()) {
306 case IR::Opcode::SetRegister:
307 if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
308 pass.WriteVariable(reg, block, inst.Arg(1));
309 }
310 break;
311 case IR::Opcode::SetPred:
312 if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
313 pass.WriteVariable(pred, block, inst.Arg(1));
314 }
315 break;
316 case IR::Opcode::SetGotoVariable:
317 pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
318 break;
319 case IR::Opcode::SetIndirectBranchVariable:
320 pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0));
321 break;
322 case IR::Opcode::SetZFlag:
323 pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0));
324 break;
325 case IR::Opcode::SetSFlag:
326 pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0));
327 break;
328 case IR::Opcode::SetCFlag:
329 pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0));
330 break;
331 case IR::Opcode::SetOFlag:
332 pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0));
333 break;
334 case IR::Opcode::GetRegister:
335 if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
336 inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
337 }
338 break;
339 case IR::Opcode::GetPred:
340 if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
341 inst.ReplaceUsesWith(pass.ReadVariable(pred, block));
342 }
343 break;
344 case IR::Opcode::GetGotoVariable:
345 inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
346 break;
347 case IR::Opcode::GetIndirectBranchVariable:
348 inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block));
349 break;
350 case IR::Opcode::GetZFlag:
351 inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block));
352 break;
353 case IR::Opcode::GetSFlag:
354 inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block));
355 break;
356 case IR::Opcode::GetCFlag:
357 inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block));
358 break;
359 case IR::Opcode::GetOFlag:
360 inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block));
361 break;
362 default:
363 break;
364 }
365}
366
367void VisitBlock(Pass& pass, IR::Block* block) {
368 for (IR::Inst& inst : block->Instructions()) {
369 VisitInst(pass, block, inst);
370 }
371 pass.SealBlock(block);
372}
373} // Anonymous namespace
374
375void SsaRewritePass(IR::Program& program) {
376 Pass pass;
377 const auto end{program.post_order_blocks.rend()};
378 for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) {
379 VisitBlock(pass, *block);
380 }
381}
382
383} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
new file mode 100644
index 000000000..44ad10d43
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -0,0 +1,523 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <bit>
7#include <optional>
8
9#include <boost/container/small_vector.hpp>
10
11#include "shader_recompiler/environment.h"
12#include "shader_recompiler/frontend/ir/basic_block.h"
13#include "shader_recompiler/frontend/ir/breadth_first_search.h"
14#include "shader_recompiler/frontend/ir/ir_emitter.h"
15#include "shader_recompiler/ir_opt/passes.h"
16#include "shader_recompiler/shader_info.h"
17
18namespace Shader::Optimization {
19namespace {
20struct ConstBufferAddr {
21 u32 index;
22 u32 offset;
23 u32 secondary_index;
24 u32 secondary_offset;
25 IR::U32 dynamic_offset;
26 u32 count;
27 bool has_secondary;
28};
29
30struct TextureInst {
31 ConstBufferAddr cbuf;
32 IR::Inst* inst;
33 IR::Block* block;
34};
35
36using TextureInstVector = boost::container::small_vector<TextureInst, 24>;
37
38constexpr u32 DESCRIPTOR_SIZE = 8;
39constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast<u32>(std::countr_zero(DESCRIPTOR_SIZE));
40
41IR::Opcode IndexedInstruction(const IR::Inst& inst) {
42 switch (inst.GetOpcode()) {
43 case IR::Opcode::BindlessImageSampleImplicitLod:
44 case IR::Opcode::BoundImageSampleImplicitLod:
45 return IR::Opcode::ImageSampleImplicitLod;
46 case IR::Opcode::BoundImageSampleExplicitLod:
47 case IR::Opcode::BindlessImageSampleExplicitLod:
48 return IR::Opcode::ImageSampleExplicitLod;
49 case IR::Opcode::BoundImageSampleDrefImplicitLod:
50 case IR::Opcode::BindlessImageSampleDrefImplicitLod:
51 return IR::Opcode::ImageSampleDrefImplicitLod;
52 case IR::Opcode::BoundImageSampleDrefExplicitLod:
53 case IR::Opcode::BindlessImageSampleDrefExplicitLod:
54 return IR::Opcode::ImageSampleDrefExplicitLod;
55 case IR::Opcode::BindlessImageGather:
56 case IR::Opcode::BoundImageGather:
57 return IR::Opcode::ImageGather;
58 case IR::Opcode::BindlessImageGatherDref:
59 case IR::Opcode::BoundImageGatherDref:
60 return IR::Opcode::ImageGatherDref;
61 case IR::Opcode::BindlessImageFetch:
62 case IR::Opcode::BoundImageFetch:
63 return IR::Opcode::ImageFetch;
64 case IR::Opcode::BoundImageQueryDimensions:
65 case IR::Opcode::BindlessImageQueryDimensions:
66 return IR::Opcode::ImageQueryDimensions;
67 case IR::Opcode::BoundImageQueryLod:
68 case IR::Opcode::BindlessImageQueryLod:
69 return IR::Opcode::ImageQueryLod;
70 case IR::Opcode::BoundImageGradient:
71 case IR::Opcode::BindlessImageGradient:
72 return IR::Opcode::ImageGradient;
73 case IR::Opcode::BoundImageRead:
74 case IR::Opcode::BindlessImageRead:
75 return IR::Opcode::ImageRead;
76 case IR::Opcode::BoundImageWrite:
77 case IR::Opcode::BindlessImageWrite:
78 return IR::Opcode::ImageWrite;
79 case IR::Opcode::BoundImageAtomicIAdd32:
80 case IR::Opcode::BindlessImageAtomicIAdd32:
81 return IR::Opcode::ImageAtomicIAdd32;
82 case IR::Opcode::BoundImageAtomicSMin32:
83 case IR::Opcode::BindlessImageAtomicSMin32:
84 return IR::Opcode::ImageAtomicSMin32;
85 case IR::Opcode::BoundImageAtomicUMin32:
86 case IR::Opcode::BindlessImageAtomicUMin32:
87 return IR::Opcode::ImageAtomicUMin32;
88 case IR::Opcode::BoundImageAtomicSMax32:
89 case IR::Opcode::BindlessImageAtomicSMax32:
90 return IR::Opcode::ImageAtomicSMax32;
91 case IR::Opcode::BoundImageAtomicUMax32:
92 case IR::Opcode::BindlessImageAtomicUMax32:
93 return IR::Opcode::ImageAtomicUMax32;
94 case IR::Opcode::BoundImageAtomicInc32:
95 case IR::Opcode::BindlessImageAtomicInc32:
96 return IR::Opcode::ImageAtomicInc32;
97 case IR::Opcode::BoundImageAtomicDec32:
98 case IR::Opcode::BindlessImageAtomicDec32:
99 return IR::Opcode::ImageAtomicDec32;
100 case IR::Opcode::BoundImageAtomicAnd32:
101 case IR::Opcode::BindlessImageAtomicAnd32:
102 return IR::Opcode::ImageAtomicAnd32;
103 case IR::Opcode::BoundImageAtomicOr32:
104 case IR::Opcode::BindlessImageAtomicOr32:
105 return IR::Opcode::ImageAtomicOr32;
106 case IR::Opcode::BoundImageAtomicXor32:
107 case IR::Opcode::BindlessImageAtomicXor32:
108 return IR::Opcode::ImageAtomicXor32;
109 case IR::Opcode::BoundImageAtomicExchange32:
110 case IR::Opcode::BindlessImageAtomicExchange32:
111 return IR::Opcode::ImageAtomicExchange32;
112 default:
113 return IR::Opcode::Void;
114 }
115}
116
117bool IsBindless(const IR::Inst& inst) {
118 switch (inst.GetOpcode()) {
119 case IR::Opcode::BindlessImageSampleImplicitLod:
120 case IR::Opcode::BindlessImageSampleExplicitLod:
121 case IR::Opcode::BindlessImageSampleDrefImplicitLod:
122 case IR::Opcode::BindlessImageSampleDrefExplicitLod:
123 case IR::Opcode::BindlessImageGather:
124 case IR::Opcode::BindlessImageGatherDref:
125 case IR::Opcode::BindlessImageFetch:
126 case IR::Opcode::BindlessImageQueryDimensions:
127 case IR::Opcode::BindlessImageQueryLod:
128 case IR::Opcode::BindlessImageGradient:
129 case IR::Opcode::BindlessImageRead:
130 case IR::Opcode::BindlessImageWrite:
131 case IR::Opcode::BindlessImageAtomicIAdd32:
132 case IR::Opcode::BindlessImageAtomicSMin32:
133 case IR::Opcode::BindlessImageAtomicUMin32:
134 case IR::Opcode::BindlessImageAtomicSMax32:
135 case IR::Opcode::BindlessImageAtomicUMax32:
136 case IR::Opcode::BindlessImageAtomicInc32:
137 case IR::Opcode::BindlessImageAtomicDec32:
138 case IR::Opcode::BindlessImageAtomicAnd32:
139 case IR::Opcode::BindlessImageAtomicOr32:
140 case IR::Opcode::BindlessImageAtomicXor32:
141 case IR::Opcode::BindlessImageAtomicExchange32:
142 return true;
143 case IR::Opcode::BoundImageSampleImplicitLod:
144 case IR::Opcode::BoundImageSampleExplicitLod:
145 case IR::Opcode::BoundImageSampleDrefImplicitLod:
146 case IR::Opcode::BoundImageSampleDrefExplicitLod:
147 case IR::Opcode::BoundImageGather:
148 case IR::Opcode::BoundImageGatherDref:
149 case IR::Opcode::BoundImageFetch:
150 case IR::Opcode::BoundImageQueryDimensions:
151 case IR::Opcode::BoundImageQueryLod:
152 case IR::Opcode::BoundImageGradient:
153 case IR::Opcode::BoundImageRead:
154 case IR::Opcode::BoundImageWrite:
155 case IR::Opcode::BoundImageAtomicIAdd32:
156 case IR::Opcode::BoundImageAtomicSMin32:
157 case IR::Opcode::BoundImageAtomicUMin32:
158 case IR::Opcode::BoundImageAtomicSMax32:
159 case IR::Opcode::BoundImageAtomicUMax32:
160 case IR::Opcode::BoundImageAtomicInc32:
161 case IR::Opcode::BoundImageAtomicDec32:
162 case IR::Opcode::BoundImageAtomicAnd32:
163 case IR::Opcode::BoundImageAtomicOr32:
164 case IR::Opcode::BoundImageAtomicXor32:
165 case IR::Opcode::BoundImageAtomicExchange32:
166 return false;
167 default:
168 throw InvalidArgument("Invalid opcode {}", inst.GetOpcode());
169 }
170}
171
172bool IsTextureInstruction(const IR::Inst& inst) {
173 return IndexedInstruction(inst) != IR::Opcode::Void;
174}
175
176std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst);
177
178std::optional<ConstBufferAddr> Track(const IR::Value& value) {
179 return IR::BreadthFirstSearch(value, TryGetConstBuffer);
180}
181
182std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
183 switch (inst->GetOpcode()) {
184 default:
185 return std::nullopt;
186 case IR::Opcode::BitwiseOr32: {
187 std::optional lhs{Track(inst->Arg(0))};
188 std::optional rhs{Track(inst->Arg(1))};
189 if (!lhs || !rhs) {
190 return std::nullopt;
191 }
192 if (lhs->has_secondary || rhs->has_secondary) {
193 return std::nullopt;
194 }
195 if (lhs->count > 1 || rhs->count > 1) {
196 return std::nullopt;
197 }
198 if (lhs->index > rhs->index || lhs->offset > rhs->offset) {
199 std::swap(lhs, rhs);
200 }
201 return ConstBufferAddr{
202 .index = lhs->index,
203 .offset = lhs->offset,
204 .secondary_index = rhs->index,
205 .secondary_offset = rhs->offset,
206 .dynamic_offset = {},
207 .count = 1,
208 .has_secondary = true,
209 };
210 }
211 case IR::Opcode::GetCbufU32x2:
212 case IR::Opcode::GetCbufU32:
213 break;
214 }
215 const IR::Value index{inst->Arg(0)};
216 const IR::Value offset{inst->Arg(1)};
217 if (!index.IsImmediate()) {
218 // Reading a bindless texture from variable indices is valid
219 // but not supported here at the moment
220 return std::nullopt;
221 }
222 if (offset.IsImmediate()) {
223 return ConstBufferAddr{
224 .index = index.U32(),
225 .offset = offset.U32(),
226 .secondary_index = 0,
227 .secondary_offset = 0,
228 .dynamic_offset = {},
229 .count = 1,
230 .has_secondary = false,
231 };
232 }
233 IR::Inst* const offset_inst{offset.InstRecursive()};
234 if (offset_inst->GetOpcode() != IR::Opcode::IAdd32) {
235 return std::nullopt;
236 }
237 u32 base_offset{};
238 IR::U32 dynamic_offset;
239 if (offset_inst->Arg(0).IsImmediate()) {
240 base_offset = offset_inst->Arg(0).U32();
241 dynamic_offset = IR::U32{offset_inst->Arg(1)};
242 } else if (offset_inst->Arg(1).IsImmediate()) {
243 base_offset = offset_inst->Arg(1).U32();
244 dynamic_offset = IR::U32{offset_inst->Arg(0)};
245 } else {
246 return std::nullopt;
247 }
248 return ConstBufferAddr{
249 .index = index.U32(),
250 .offset = base_offset,
251 .secondary_index = 0,
252 .secondary_offset = 0,
253 .dynamic_offset = dynamic_offset,
254 .count = 8,
255 .has_secondary = false,
256 };
257}
258
259TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
260 ConstBufferAddr addr;
261 if (IsBindless(inst)) {
262 const std::optional<ConstBufferAddr> track_addr{Track(inst.Arg(0))};
263 if (!track_addr) {
264 throw NotImplementedException("Failed to track bindless texture constant buffer");
265 }
266 addr = *track_addr;
267 } else {
268 addr = ConstBufferAddr{
269 .index = env.TextureBoundBuffer(),
270 .offset = inst.Arg(0).U32(),
271 .secondary_index = 0,
272 .secondary_offset = 0,
273 .dynamic_offset = {},
274 .count = 1,
275 .has_secondary = false,
276 };
277 }
278 return TextureInst{
279 .cbuf = addr,
280 .inst = &inst,
281 .block = block,
282 };
283}
284
285TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
286 const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index};
287 const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset};
288 const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)};
289 const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)};
290 return env.ReadTextureType(lhs_raw | rhs_raw);
291}
292
293class Descriptors {
294public:
295 explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_,
296 ImageBufferDescriptors& image_buffer_descriptors_,
297 TextureDescriptors& texture_descriptors_,
298 ImageDescriptors& image_descriptors_)
299 : texture_buffer_descriptors{texture_buffer_descriptors_},
300 image_buffer_descriptors{image_buffer_descriptors_},
301 texture_descriptors{texture_descriptors_}, image_descriptors{image_descriptors_} {}
302
303 u32 Add(const TextureBufferDescriptor& desc) {
304 return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) {
305 return desc.cbuf_index == existing.cbuf_index &&
306 desc.cbuf_offset == existing.cbuf_offset &&
307 desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
308 desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
309 desc.count == existing.count && desc.size_shift == existing.size_shift &&
310 desc.has_secondary == existing.has_secondary;
311 });
312 }
313
314 u32 Add(const ImageBufferDescriptor& desc) {
315 const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) {
316 return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index &&
317 desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
318 desc.size_shift == existing.size_shift;
319 })};
320 image_buffer_descriptors[index].is_written |= desc.is_written;
321 image_buffer_descriptors[index].is_read |= desc.is_read;
322 return index;
323 }
324
325 u32 Add(const TextureDescriptor& desc) {
326 return Add(texture_descriptors, desc, [&desc](const auto& existing) {
327 return desc.type == existing.type && desc.is_depth == existing.is_depth &&
328 desc.has_secondary == existing.has_secondary &&
329 desc.cbuf_index == existing.cbuf_index &&
330 desc.cbuf_offset == existing.cbuf_offset &&
331 desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
332 desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
333 desc.count == existing.count && desc.size_shift == existing.size_shift;
334 });
335 }
336
337 u32 Add(const ImageDescriptor& desc) {
338 const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) {
339 return desc.type == existing.type && desc.format == existing.format &&
340 desc.cbuf_index == existing.cbuf_index &&
341 desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
342 desc.size_shift == existing.size_shift;
343 })};
344 image_descriptors[index].is_written |= desc.is_written;
345 image_descriptors[index].is_read |= desc.is_read;
346 return index;
347 }
348
349private:
350 template <typename Descriptors, typename Descriptor, typename Func>
351 static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
352 // TODO: Handle arrays
353 const auto it{std::ranges::find_if(descriptors, pred)};
354 if (it != descriptors.end()) {
355 return static_cast<u32>(std::distance(descriptors.begin(), it));
356 }
357 descriptors.push_back(desc);
358 return static_cast<u32>(descriptors.size()) - 1;
359 }
360
361 TextureBufferDescriptors& texture_buffer_descriptors;
362 ImageBufferDescriptors& image_buffer_descriptors;
363 TextureDescriptors& texture_descriptors;
364 ImageDescriptors& image_descriptors;
365};
366} // Anonymous namespace
367
368void TexturePass(Environment& env, IR::Program& program) {
369 TextureInstVector to_replace;
370 for (IR::Block* const block : program.post_order_blocks) {
371 for (IR::Inst& inst : block->Instructions()) {
372 if (!IsTextureInstruction(inst)) {
373 continue;
374 }
375 to_replace.push_back(MakeInst(env, block, inst));
376 }
377 }
378 // Sort instructions to visit textures by constant buffer index, then by offset
379 std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) {
380 return lhs.cbuf.offset < rhs.cbuf.offset;
381 });
382 std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) {
383 return lhs.cbuf.index < rhs.cbuf.index;
384 });
385 Descriptors descriptors{
386 program.info.texture_buffer_descriptors,
387 program.info.image_buffer_descriptors,
388 program.info.texture_descriptors,
389 program.info.image_descriptors,
390 };
391 for (TextureInst& texture_inst : to_replace) {
392 // TODO: Handle arrays
393 IR::Inst* const inst{texture_inst.inst};
394 inst->ReplaceOpcode(IndexedInstruction(*inst));
395
396 const auto& cbuf{texture_inst.cbuf};
397 auto flags{inst->Flags<IR::TextureInstInfo>()};
398 switch (inst->GetOpcode()) {
399 case IR::Opcode::ImageQueryDimensions:
400 flags.type.Assign(ReadTextureType(env, cbuf));
401 inst->SetFlags(flags);
402 break;
403 case IR::Opcode::ImageFetch:
404 if (flags.type != TextureType::Color1D) {
405 break;
406 }
407 if (ReadTextureType(env, cbuf) == TextureType::Buffer) {
408 // Replace with the bound texture type only when it's a texture buffer
409 // If the instruction is 1D and the bound type is 2D, don't change the code and let
410 // the rasterizer robustness handle it
411 // This happens on Fire Emblem: Three Houses
412 flags.type.Assign(TextureType::Buffer);
413 }
414 break;
415 default:
416 break;
417 }
418 u32 index;
419 switch (inst->GetOpcode()) {
420 case IR::Opcode::ImageRead:
421 case IR::Opcode::ImageAtomicIAdd32:
422 case IR::Opcode::ImageAtomicSMin32:
423 case IR::Opcode::ImageAtomicUMin32:
424 case IR::Opcode::ImageAtomicSMax32:
425 case IR::Opcode::ImageAtomicUMax32:
426 case IR::Opcode::ImageAtomicInc32:
427 case IR::Opcode::ImageAtomicDec32:
428 case IR::Opcode::ImageAtomicAnd32:
429 case IR::Opcode::ImageAtomicOr32:
430 case IR::Opcode::ImageAtomicXor32:
431 case IR::Opcode::ImageAtomicExchange32:
432 case IR::Opcode::ImageWrite: {
433 if (cbuf.has_secondary) {
434 throw NotImplementedException("Unexpected separate sampler");
435 }
436 const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead};
437 const bool is_read{inst->GetOpcode() != IR::Opcode::ImageWrite};
438 if (flags.type == TextureType::Buffer) {
439 index = descriptors.Add(ImageBufferDescriptor{
440 .format = flags.image_format,
441 .is_written = is_written,
442 .is_read = is_read,
443 .cbuf_index = cbuf.index,
444 .cbuf_offset = cbuf.offset,
445 .count = cbuf.count,
446 .size_shift = DESCRIPTOR_SIZE_SHIFT,
447 });
448 } else {
449 index = descriptors.Add(ImageDescriptor{
450 .type = flags.type,
451 .format = flags.image_format,
452 .is_written = is_written,
453 .is_read = is_read,
454 .cbuf_index = cbuf.index,
455 .cbuf_offset = cbuf.offset,
456 .count = cbuf.count,
457 .size_shift = DESCRIPTOR_SIZE_SHIFT,
458 });
459 }
460 break;
461 }
462 default:
463 if (flags.type == TextureType::Buffer) {
464 index = descriptors.Add(TextureBufferDescriptor{
465 .has_secondary = cbuf.has_secondary,
466 .cbuf_index = cbuf.index,
467 .cbuf_offset = cbuf.offset,
468 .secondary_cbuf_index = cbuf.secondary_index,
469 .secondary_cbuf_offset = cbuf.secondary_offset,
470 .count = cbuf.count,
471 .size_shift = DESCRIPTOR_SIZE_SHIFT,
472 });
473 } else {
474 index = descriptors.Add(TextureDescriptor{
475 .type = flags.type,
476 .is_depth = flags.is_depth != 0,
477 .has_secondary = cbuf.has_secondary,
478 .cbuf_index = cbuf.index,
479 .cbuf_offset = cbuf.offset,
480 .secondary_cbuf_index = cbuf.secondary_index,
481 .secondary_cbuf_offset = cbuf.secondary_offset,
482 .count = cbuf.count,
483 .size_shift = DESCRIPTOR_SIZE_SHIFT,
484 });
485 }
486 break;
487 }
488 flags.descriptor_index.Assign(index);
489 inst->SetFlags(flags);
490
491 if (cbuf.count > 1) {
492 const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)};
493 IR::IREmitter ir{*texture_inst.block, insert_point};
494 const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))};
495 inst->SetArg(0, ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift));
496 } else {
497 inst->SetArg(0, IR::Value{});
498 }
499 }
500}
501
502void JoinTextureInfo(Info& base, Info& source) {
503 Descriptors descriptors{
504 base.texture_buffer_descriptors,
505 base.image_buffer_descriptors,
506 base.texture_descriptors,
507 base.image_descriptors,
508 };
509 for (auto& desc : source.texture_buffer_descriptors) {
510 descriptors.Add(desc);
511 }
512 for (auto& desc : source.image_buffer_descriptors) {
513 descriptors.Add(desc);
514 }
515 for (auto& desc : source.texture_descriptors) {
516 descriptors.Add(desc);
517 }
518 for (auto& desc : source.image_descriptors) {
519 descriptors.Add(desc);
520 }
521}
522
523} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp
new file mode 100644
index 000000000..975d5aadf
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/verification_pass.cpp
@@ -0,0 +1,98 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <map>
6#include <set>
7
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/ir/basic_block.h"
10#include "shader_recompiler/frontend/ir/value.h"
11#include "shader_recompiler/ir_opt/passes.h"
12
13namespace Shader::Optimization {
14
15static void ValidateTypes(const IR::Program& program) {
16 for (const auto& block : program.blocks) {
17 for (const IR::Inst& inst : *block) {
18 if (inst.GetOpcode() == IR::Opcode::Phi) {
19 // Skip validation on phi nodes
20 continue;
21 }
22 const size_t num_args{inst.NumArgs()};
23 for (size_t i = 0; i < num_args; ++i) {
24 const IR::Type t1{inst.Arg(i).Type()};
25 const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)};
26 if (!IR::AreTypesCompatible(t1, t2)) {
27 throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block));
28 }
29 }
30 }
31 }
32}
33
34static void ValidateUses(const IR::Program& program) {
35 std::map<IR::Inst*, int> actual_uses;
36 for (const auto& block : program.blocks) {
37 for (const IR::Inst& inst : *block) {
38 const size_t num_args{inst.NumArgs()};
39 for (size_t i = 0; i < num_args; ++i) {
40 const IR::Value arg{inst.Arg(i)};
41 if (!arg.IsImmediate()) {
42 ++actual_uses[arg.Inst()];
43 }
44 }
45 }
46 }
47 for (const auto [inst, uses] : actual_uses) {
48 if (inst->UseCount() != uses) {
49 throw LogicError("Invalid uses in block: {}", IR::DumpProgram(program));
50 }
51 }
52}
53
54static void ValidateForwardDeclarations(const IR::Program& program) {
55 std::set<const IR::Inst*> definitions;
56 for (const IR::Block* const block : program.blocks) {
57 for (const IR::Inst& inst : *block) {
58 definitions.emplace(&inst);
59 if (inst.GetOpcode() == IR::Opcode::Phi) {
60 // Phi nodes can have forward declarations
61 continue;
62 }
63 const size_t num_args{inst.NumArgs()};
64 for (size_t arg = 0; arg < num_args; ++arg) {
65 if (inst.Arg(arg).IsImmediate()) {
66 continue;
67 }
68 if (!definitions.contains(inst.Arg(arg).Inst())) {
69 throw LogicError("Forward declaration in block: {}", IR::DumpBlock(*block));
70 }
71 }
72 }
73 }
74}
75
76static void ValidatePhiNodes(const IR::Program& program) {
77 for (const IR::Block* const block : program.blocks) {
78 bool no_more_phis{false};
79 for (const IR::Inst& inst : *block) {
80 if (inst.GetOpcode() == IR::Opcode::Phi) {
81 if (no_more_phis) {
82 throw LogicError("Interleaved phi nodes: {}", IR::DumpBlock(*block));
83 }
84 } else {
85 no_more_phis = true;
86 }
87 }
88 }
89}
90
91void VerificationPass(const IR::Program& program) {
92 ValidateTypes(program);
93 ValidateUses(program);
94 ValidateForwardDeclarations(program);
95 ValidatePhiNodes(program);
96}
97
98} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h
new file mode 100644
index 000000000..f8b255b66
--- /dev/null
+++ b/src/shader_recompiler/object_pool.h
@@ -0,0 +1,104 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <type_traits>
9#include <utility>
10
11namespace Shader {
12
13template <typename T>
14requires std::is_destructible_v<T> class ObjectPool {
15public:
16 explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} {
17 node = &chunks.emplace_back(new_chunk_size);
18 }
19
20 template <typename... Args>
21 requires std::is_constructible_v<T, Args...>[[nodiscard]] T* Create(Args&&... args) {
22 return std::construct_at(Memory(), std::forward<Args>(args)...);
23 }
24
25 void ReleaseContents() {
26 if (chunks.empty()) {
27 return;
28 }
29 Chunk& root{chunks.front()};
30 if (root.used_objects == root.num_objects) {
31 // Root chunk has been filled, squash allocations into it
32 const size_t total_objects{root.num_objects + new_chunk_size * (chunks.size() - 1)};
33 chunks.clear();
34 chunks.emplace_back(total_objects);
35 } else {
36 root.Release();
37 chunks.resize(1);
38 }
39 chunks.shrink_to_fit();
40 node = &chunks.front();
41 }
42
43private:
44 struct NonTrivialDummy {
45 NonTrivialDummy() noexcept {}
46 };
47
48 union Storage {
49 Storage() noexcept {}
50 ~Storage() noexcept {}
51
52 NonTrivialDummy dummy{};
53 T object;
54 };
55
56 struct Chunk {
57 explicit Chunk() = default;
58 explicit Chunk(size_t size)
59 : num_objects{size}, storage{std::make_unique<Storage[]>(size)} {}
60
61 Chunk& operator=(Chunk&& rhs) noexcept {
62 Release();
63 used_objects = std::exchange(rhs.used_objects, 0);
64 num_objects = std::exchange(rhs.num_objects, 0);
65 storage = std::move(rhs.storage);
66 }
67
68 Chunk(Chunk&& rhs) noexcept
69 : used_objects{std::exchange(rhs.used_objects, 0)},
70 num_objects{std::exchange(rhs.num_objects, 0)}, storage{std::move(rhs.storage)} {}
71
72 ~Chunk() {
73 Release();
74 }
75
76 void Release() {
77 std::destroy_n(storage.get(), used_objects);
78 used_objects = 0;
79 }
80
81 size_t used_objects{};
82 size_t num_objects{};
83 std::unique_ptr<Storage[]> storage;
84 };
85
86 [[nodiscard]] T* Memory() {
87 Chunk* const chunk{FreeChunk()};
88 return &chunk->storage[chunk->used_objects++].object;
89 }
90
91 [[nodiscard]] Chunk* FreeChunk() {
92 if (node->used_objects != node->num_objects) {
93 return node;
94 }
95 node = &chunks.emplace_back(new_chunk_size);
96 return node;
97 }
98
99 Chunk* node{};
100 std::vector<Chunk> chunks;
101 size_t new_chunk_size{};
102};
103
104} // namespace Shader
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
new file mode 100644
index 000000000..f0c3b3b17
--- /dev/null
+++ b/src/shader_recompiler/profile.h
@@ -0,0 +1,74 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Shader {
10
11struct Profile {
12 u32 supported_spirv{0x00010000};
13
14 bool unified_descriptor_binding{};
15 bool support_descriptor_aliasing{};
16 bool support_int8{};
17 bool support_int16{};
18 bool support_int64{};
19 bool support_vertex_instance_id{};
20 bool support_float_controls{};
21 bool support_separate_denorm_behavior{};
22 bool support_separate_rounding_mode{};
23 bool support_fp16_denorm_preserve{};
24 bool support_fp32_denorm_preserve{};
25 bool support_fp16_denorm_flush{};
26 bool support_fp32_denorm_flush{};
27 bool support_fp16_signed_zero_nan_preserve{};
28 bool support_fp32_signed_zero_nan_preserve{};
29 bool support_fp64_signed_zero_nan_preserve{};
30 bool support_explicit_workgroup_layout{};
31 bool support_vote{};
32 bool support_viewport_index_layer_non_geometry{};
33 bool support_viewport_mask{};
34 bool support_typeless_image_loads{};
35 bool support_demote_to_helper_invocation{};
36 bool support_int64_atomics{};
37 bool support_derivative_control{};
38 bool support_geometry_shader_passthrough{};
39 bool support_gl_nv_gpu_shader_5{};
40 bool support_gl_amd_gpu_shader_half_float{};
41 bool support_gl_texture_shadow_lod{};
42 bool support_gl_warp_intrinsics{};
43 bool support_gl_variable_aoffi{};
44 bool support_gl_sparse_textures{};
45 bool support_gl_derivative_control{};
46
47 bool warp_size_potentially_larger_than_guest{};
48
49 bool lower_left_origin_mode{};
50 /// Fragment outputs have to be declared even if they are not written to avoid undefined values.
51 /// See Ori and the Blind Forest's main menu for reference.
52 bool need_declared_frag_colors{};
53 /// Prevents fast math optimizations that may cause inaccuracies
54 bool need_fastmath_off{};
55
56 /// OpFClamp is broken and OpFMax + OpFMin should be used instead
57 bool has_broken_spirv_clamp{};
58 /// Offset image operands with an unsigned type do not work
59 bool has_broken_unsigned_image_offsets{};
60 /// Signed instructions with unsigned data types are misinterpreted
61 bool has_broken_signed_operations{};
62 /// Float controls break when fp16 is enabled
63 bool has_broken_fp16_float_controls{};
64 /// Dynamic vec4 indexing is broken on some OpenGL drivers
65 bool has_gl_component_indexing_bug{};
66 /// The precise type qualifier is broken in the fragment stage of some drivers
67 bool has_gl_precise_bug{};
68 /// Ignores SPIR-V ordered vs unordered using GLSL semantics
69 bool ignore_nan_fp_comparisons{};
70
71 u32 gl_max_compute_smem_size{};
72};
73
74} // namespace Shader
diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h
new file mode 100644
index 000000000..bd6c2bfb5
--- /dev/null
+++ b/src/shader_recompiler/program_header.h
@@ -0,0 +1,219 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9
10#include "common/bit_field.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13
14namespace Shader {
15
16enum class OutputTopology : u32 {
17 PointList = 1,
18 LineStrip = 6,
19 TriangleStrip = 7,
20};
21
22enum class PixelImap : u8 {
23 Unused = 0,
24 Constant = 1,
25 Perspective = 2,
26 ScreenLinear = 3,
27};
28
29// Documentation in:
30// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
31struct ProgramHeader {
32 union {
33 BitField<0, 5, u32> sph_type;
34 BitField<5, 5, u32> version;
35 BitField<10, 4, u32> shader_type;
36 BitField<14, 1, u32> mrt_enable;
37 BitField<15, 1, u32> kills_pixels;
38 BitField<16, 1, u32> does_global_store;
39 BitField<17, 4, u32> sass_version;
40 BitField<21, 2, u32> reserved1;
41 BitField<24, 1, u32> geometry_passthrough;
42 BitField<25, 1, u32> reserved2;
43 BitField<26, 1, u32> does_load_or_store;
44 BitField<27, 1, u32> does_fp64;
45 BitField<28, 4, u32> stream_out_mask;
46 } common0;
47
48 union {
49 BitField<0, 24, u32> shader_local_memory_low_size;
50 BitField<24, 8, u32> per_patch_attribute_count;
51 } common1;
52
53 union {
54 BitField<0, 24, u32> shader_local_memory_high_size;
55 BitField<24, 8, u32> threads_per_input_primitive;
56 } common2;
57
58 union {
59 BitField<0, 24, u32> shader_local_memory_crs_size;
60 BitField<24, 4, OutputTopology> output_topology;
61 BitField<28, 4, u32> reserved;
62 } common3;
63
64 union {
65 BitField<0, 12, u32> max_output_vertices;
66 BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
67 BitField<20, 4, u32> reserved;
68 BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
69 } common4;
70
71 union {
72 struct {
73 INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
74
75 union {
76 BitField<0, 1, u8> primitive_array_id;
77 BitField<1, 1, u8> rt_array_index;
78 BitField<2, 1, u8> viewport_index;
79 BitField<3, 1, u8> point_size;
80 BitField<4, 1, u8> position_x;
81 BitField<5, 1, u8> position_y;
82 BitField<6, 1, u8> position_z;
83 BitField<7, 1, u8> position_w;
84 u8 raw;
85 } imap_systemb;
86
87 std::array<u8, 16> imap_generic_vector;
88
89 INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
90 union {
91 BitField<0, 8, u16> clip_distances;
92 BitField<8, 1, u16> point_sprite_s;
93 BitField<9, 1, u16> point_sprite_t;
94 BitField<10, 1, u16> fog_coordinate;
95 BitField<12, 1, u16> tessellation_eval_point_u;
96 BitField<13, 1, u16> tessellation_eval_point_v;
97 BitField<14, 1, u16> instance_id;
98 BitField<15, 1, u16> vertex_id;
99 };
100 INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10]
101 INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved
102 INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA
103
104 union {
105 BitField<0, 1, u8> primitive_array_id;
106 BitField<1, 1, u8> rt_array_index;
107 BitField<2, 1, u8> viewport_index;
108 BitField<3, 1, u8> point_size;
109 BitField<4, 1, u8> position_x;
110 BitField<5, 1, u8> position_y;
111 BitField<6, 1, u8> position_z;
112 BitField<7, 1, u8> position_w;
113 u8 raw;
114 } omap_systemb;
115
116 std::array<u8, 16> omap_generic_vector;
117
118 INSERT_PADDING_BYTES_NOINIT(2); // OmapColor
119
120 union {
121 BitField<0, 8, u16> clip_distances;
122 BitField<8, 1, u16> point_sprite_s;
123 BitField<9, 1, u16> point_sprite_t;
124 BitField<10, 1, u16> fog_coordinate;
125 BitField<12, 1, u16> tessellation_eval_point_u;
126 BitField<13, 1, u16> tessellation_eval_point_v;
127 BitField<14, 1, u16> instance_id;
128 BitField<15, 1, u16> vertex_id;
129 } omap_systemc;
130
131 INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10]
132 INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved
133
134 [[nodiscard]] std::array<bool, 4> InputGeneric(size_t index) const noexcept {
135 const int data{imap_generic_vector[index >> 1] >> ((index % 2) * 4)};
136 return {
137 (data & 1) != 0,
138 (data & 2) != 0,
139 (data & 4) != 0,
140 (data & 8) != 0,
141 };
142 }
143
144 [[nodiscard]] std::array<bool, 4> OutputGeneric(size_t index) const noexcept {
145 const int data{omap_generic_vector[index >> 1] >> ((index % 2) * 4)};
146 return {
147 (data & 1) != 0,
148 (data & 2) != 0,
149 (data & 4) != 0,
150 (data & 8) != 0,
151 };
152 }
153 } vtg;
154
155 struct {
156 INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
157
158 union {
159 BitField<0, 1, u8> primitive_array_id;
160 BitField<1, 1, u8> rt_array_index;
161 BitField<2, 1, u8> viewport_index;
162 BitField<3, 1, u8> point_size;
163 BitField<4, 1, u8> position_x;
164 BitField<5, 1, u8> position_y;
165 BitField<6, 1, u8> position_z;
166 BitField<7, 1, u8> position_w;
167 BitField<0, 4, u8> first;
168 BitField<4, 4, u8> position;
169 u8 raw;
170 } imap_systemb;
171
172 union {
173 BitField<0, 2, PixelImap> x;
174 BitField<2, 2, PixelImap> y;
175 BitField<4, 2, PixelImap> z;
176 BitField<6, 2, PixelImap> w;
177 u8 raw;
178 } imap_generic_vector[32];
179
180 INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
181 INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC
182 INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
183 INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved
184
185 struct {
186 u32 target;
187 union {
188 BitField<0, 1, u32> sample_mask;
189 BitField<1, 1, u32> depth;
190 BitField<2, 30, u32> reserved;
191 };
192 } omap;
193
194 [[nodiscard]] std::array<bool, 4> EnabledOutputComponents(u32 rt) const noexcept {
195 const u32 bits{omap.target >> (rt * 4)};
196 return {(bits & 1) != 0, (bits & 2) != 0, (bits & 4) != 0, (bits & 8) != 0};
197 }
198
199 [[nodiscard]] std::array<PixelImap, 4> GenericInputMap(u32 attribute) const {
200 const auto& vector{imap_generic_vector[attribute]};
201 return {vector.x, vector.y, vector.z, vector.w};
202 }
203
204 [[nodiscard]] bool IsGenericVectorActive(size_t index) const {
205 return imap_generic_vector[index].raw != 0;
206 }
207 } ps;
208
209 std::array<u32, 0xf> raw;
210 };
211
212 [[nodiscard]] u64 LocalMemorySize() const noexcept {
213 return static_cast<u64>(common1.shader_local_memory_low_size) |
214 (static_cast<u64>(common2.shader_local_memory_high_size) << 24);
215 }
216};
217static_assert(sizeof(ProgramHeader) == 0x50, "Incorrect structure size");
218
219} // namespace Shader
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h
new file mode 100644
index 000000000..f3f83a258
--- /dev/null
+++ b/src/shader_recompiler/runtime_info.h
@@ -0,0 +1,88 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <bitset>
9#include <optional>
10#include <vector>
11
12#include "common/common_types.h"
13#include "shader_recompiler/varying_state.h"
14
15namespace Shader {
16
17enum class AttributeType : u8 {
18 Float,
19 SignedInt,
20 UnsignedInt,
21 Disabled,
22};
23
24enum class InputTopology {
25 Points,
26 Lines,
27 LinesAdjacency,
28 Triangles,
29 TrianglesAdjacency,
30};
31
32enum class CompareFunction {
33 Never,
34 Less,
35 Equal,
36 LessThanEqual,
37 Greater,
38 NotEqual,
39 GreaterThanEqual,
40 Always,
41};
42
43enum class TessPrimitive {
44 Isolines,
45 Triangles,
46 Quads,
47};
48
49enum class TessSpacing {
50 Equal,
51 FractionalOdd,
52 FractionalEven,
53};
54
55struct TransformFeedbackVarying {
56 u32 buffer{};
57 u32 stride{};
58 u32 offset{};
59 u32 components{};
60};
61
62struct RuntimeInfo {
63 std::array<AttributeType, 32> generic_input_types{};
64 VaryingState previous_stage_stores;
65
66 bool convert_depth_mode{};
67 bool force_early_z{};
68
69 TessPrimitive tess_primitive{};
70 TessSpacing tess_spacing{};
71 bool tess_clockwise{};
72
73 InputTopology input_topology{};
74
75 std::optional<float> fixed_state_point_size;
76 std::optional<CompareFunction> alpha_test_func;
77 float alpha_test_reference{};
78
79 /// Static Y negate value
80 bool y_negate{};
81 /// Use storage buffers instead of global pointers on GLASM
82 bool glasm_use_storage_buffers{};
83
84 /// Transform feedback state for each varying
85 std::vector<TransformFeedbackVarying> xfb_varyings;
86};
87
88} // namespace Shader
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
new file mode 100644
index 000000000..4ef4dbd40
--- /dev/null
+++ b/src/shader_recompiler/shader_info.h
@@ -0,0 +1,193 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <bitset>
9
10#include "common/common_types.h"
11#include "shader_recompiler/frontend/ir/type.h"
12#include "shader_recompiler/varying_state.h"
13
14#include <boost/container/small_vector.hpp>
15#include <boost/container/static_vector.hpp>
16
17namespace Shader {
18
19enum class TextureType : u32 {
20 Color1D,
21 ColorArray1D,
22 Color2D,
23 ColorArray2D,
24 Color3D,
25 ColorCube,
26 ColorArrayCube,
27 Buffer,
28};
29constexpr u32 NUM_TEXTURE_TYPES = 8;
30
31enum class ImageFormat : u32 {
32 Typeless,
33 R8_UINT,
34 R8_SINT,
35 R16_UINT,
36 R16_SINT,
37 R32_UINT,
38 R32G32_UINT,
39 R32G32B32A32_UINT,
40};
41
42enum class Interpolation {
43 Smooth,
44 Flat,
45 NoPerspective,
46};
47
48struct ConstantBufferDescriptor {
49 u32 index;
50 u32 count;
51};
52
53struct StorageBufferDescriptor {
54 u32 cbuf_index;
55 u32 cbuf_offset;
56 u32 count;
57 bool is_written;
58};
59
60struct TextureBufferDescriptor {
61 bool has_secondary;
62 u32 cbuf_index;
63 u32 cbuf_offset;
64 u32 secondary_cbuf_index;
65 u32 secondary_cbuf_offset;
66 u32 count;
67 u32 size_shift;
68};
69using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>;
70
71struct ImageBufferDescriptor {
72 ImageFormat format;
73 bool is_written;
74 bool is_read;
75 u32 cbuf_index;
76 u32 cbuf_offset;
77 u32 count;
78 u32 size_shift;
79};
80using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>;
81
82struct TextureDescriptor {
83 TextureType type;
84 bool is_depth;
85 bool has_secondary;
86 u32 cbuf_index;
87 u32 cbuf_offset;
88 u32 secondary_cbuf_index;
89 u32 secondary_cbuf_offset;
90 u32 count;
91 u32 size_shift;
92};
93using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
94
95struct ImageDescriptor {
96 TextureType type;
97 ImageFormat format;
98 bool is_written;
99 bool is_read;
100 u32 cbuf_index;
101 u32 cbuf_offset;
102 u32 count;
103 u32 size_shift;
104};
105using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
106
107struct Info {
108 static constexpr size_t MAX_CBUFS{18};
109 static constexpr size_t MAX_SSBOS{32};
110
111 bool uses_workgroup_id{};
112 bool uses_local_invocation_id{};
113 bool uses_invocation_id{};
114 bool uses_sample_id{};
115 bool uses_is_helper_invocation{};
116 bool uses_subgroup_invocation_id{};
117 bool uses_subgroup_shuffles{};
118 std::array<bool, 30> uses_patches{};
119
120 std::array<Interpolation, 32> interpolation{};
121 VaryingState loads;
122 VaryingState stores;
123 VaryingState passthrough;
124
125 bool loads_indexed_attributes{};
126
127 std::array<bool, 8> stores_frag_color{};
128 bool stores_sample_mask{};
129 bool stores_frag_depth{};
130
131 bool stores_tess_level_outer{};
132 bool stores_tess_level_inner{};
133
134 bool stores_indexed_attributes{};
135
136 bool stores_global_memory{};
137
138 bool uses_fp16{};
139 bool uses_fp64{};
140 bool uses_fp16_denorms_flush{};
141 bool uses_fp16_denorms_preserve{};
142 bool uses_fp32_denorms_flush{};
143 bool uses_fp32_denorms_preserve{};
144 bool uses_int8{};
145 bool uses_int16{};
146 bool uses_int64{};
147 bool uses_image_1d{};
148 bool uses_sampled_1d{};
149 bool uses_sparse_residency{};
150 bool uses_demote_to_helper_invocation{};
151 bool uses_subgroup_vote{};
152 bool uses_subgroup_mask{};
153 bool uses_fswzadd{};
154 bool uses_derivatives{};
155 bool uses_typeless_image_reads{};
156 bool uses_typeless_image_writes{};
157 bool uses_image_buffers{};
158 bool uses_shared_increment{};
159 bool uses_shared_decrement{};
160 bool uses_global_increment{};
161 bool uses_global_decrement{};
162 bool uses_atomic_f32_add{};
163 bool uses_atomic_f16x2_add{};
164 bool uses_atomic_f16x2_min{};
165 bool uses_atomic_f16x2_max{};
166 bool uses_atomic_f32x2_add{};
167 bool uses_atomic_f32x2_min{};
168 bool uses_atomic_f32x2_max{};
169 bool uses_atomic_s32_min{};
170 bool uses_atomic_s32_max{};
171 bool uses_int64_bit_atomics{};
172 bool uses_global_memory{};
173 bool uses_atomic_image_u32{};
174 bool uses_shadow_lod{};
175
176 IR::Type used_constant_buffer_types{};
177 IR::Type used_storage_buffer_types{};
178
179 u32 constant_buffer_mask{};
180 std::array<u32, MAX_CBUFS> constant_buffer_used_sizes{};
181 u32 nvn_buffer_base{};
182 std::bitset<16> nvn_buffer_used{};
183
184 boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
185 constant_buffer_descriptors;
186 boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors;
187 TextureBufferDescriptors texture_buffer_descriptors;
188 ImageBufferDescriptors image_buffer_descriptors;
189 TextureDescriptors texture_descriptors;
190 ImageDescriptors image_descriptors;
191};
192
193} // namespace Shader
diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h
new file mode 100644
index 000000000..5c1c8d8fc
--- /dev/null
+++ b/src/shader_recompiler/stage.h
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Shader {
10
11enum class Stage : u32 {
12 VertexB,
13 TessellationControl,
14 TessellationEval,
15 Geometry,
16 Fragment,
17
18 Compute,
19
20 VertexA,
21};
22constexpr u32 MaxStageTypes = 6;
23
24[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept {
25 return static_cast<Stage>(static_cast<size_t>(Stage::VertexB) + index);
26}
27
28} // namespace Shader
diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h
new file mode 100644
index 000000000..9d7b24a76
--- /dev/null
+++ b/src/shader_recompiler/varying_state.h
@@ -0,0 +1,69 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <bitset>
8#include <cstddef>
9
10#include "shader_recompiler/frontend/ir/attribute.h"
11
12namespace Shader {
13
14struct VaryingState {
15 std::bitset<256> mask{};
16
17 void Set(IR::Attribute attribute, bool state = true) {
18 mask[static_cast<size_t>(attribute)] = state;
19 }
20
21 [[nodiscard]] bool operator[](IR::Attribute attribute) const noexcept {
22 return mask[static_cast<size_t>(attribute)];
23 }
24
25 [[nodiscard]] bool AnyComponent(IR::Attribute base) const noexcept {
26 return mask[static_cast<size_t>(base) + 0] || mask[static_cast<size_t>(base) + 1] ||
27 mask[static_cast<size_t>(base) + 2] || mask[static_cast<size_t>(base) + 3];
28 }
29
30 [[nodiscard]] bool AllComponents(IR::Attribute base) const noexcept {
31 return mask[static_cast<size_t>(base) + 0] && mask[static_cast<size_t>(base) + 1] &&
32 mask[static_cast<size_t>(base) + 2] && mask[static_cast<size_t>(base) + 3];
33 }
34
35 [[nodiscard]] bool IsUniform(IR::Attribute base) const noexcept {
36 return AnyComponent(base) == AllComponents(base);
37 }
38
39 [[nodiscard]] bool Generic(size_t index, size_t component) const noexcept {
40 return mask[static_cast<size_t>(IR::Attribute::Generic0X) + index * 4 + component];
41 }
42
43 [[nodiscard]] bool Generic(size_t index) const noexcept {
44 return Generic(index, 0) || Generic(index, 1) || Generic(index, 2) || Generic(index, 3);
45 }
46
47 [[nodiscard]] bool ClipDistances() const noexcept {
48 return AnyComponent(IR::Attribute::ClipDistance0) ||
49 AnyComponent(IR::Attribute::ClipDistance4);
50 }
51
52 [[nodiscard]] bool Legacy() const noexcept {
53 return AnyComponent(IR::Attribute::ColorFrontDiffuseR) ||
54 AnyComponent(IR::Attribute::ColorFrontSpecularR) ||
55 AnyComponent(IR::Attribute::ColorBackDiffuseR) ||
56 AnyComponent(IR::Attribute::ColorBackSpecularR) || FixedFunctionTexture();
57 }
58
59 [[nodiscard]] bool FixedFunctionTexture() const noexcept {
60 for (size_t index = 0; index < 10; ++index) {
61 if (AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
62 return true;
63 }
64 }
65 return false;
66 }
67};
68
69} // namespace Shader
diff --git a/src/tests/common/unique_function.cpp b/src/tests/common/unique_function.cpp
index ac9912738..aa6e86593 100644
--- a/src/tests/common/unique_function.cpp
+++ b/src/tests/common/unique_function.cpp
@@ -17,10 +17,12 @@ struct Noisy {
17 Noisy& operator=(Noisy&& rhs) noexcept { 17 Noisy& operator=(Noisy&& rhs) noexcept {
18 state = "Move assigned"; 18 state = "Move assigned";
19 rhs.state = "Moved away"; 19 rhs.state = "Moved away";
20 return *this;
20 } 21 }
21 Noisy(const Noisy&) : state{"Copied constructed"} {} 22 Noisy(const Noisy&) : state{"Copied constructed"} {}
22 Noisy& operator=(const Noisy&) { 23 Noisy& operator=(const Noisy&) {
23 state = "Copied assigned"; 24 state = "Copied assigned";
25 return *this;
24 } 26 }
25 27
26 std::string state; 28 std::string state;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index e4de55f4d..007ecc13e 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -29,7 +29,6 @@ add_library(video_core STATIC
29 dirty_flags.h 29 dirty_flags.h
30 dma_pusher.cpp 30 dma_pusher.cpp
31 dma_pusher.h 31 dma_pusher.h
32 engines/const_buffer_engine_interface.h
33 engines/const_buffer_info.h 32 engines/const_buffer_info.h
34 engines/engine_interface.h 33 engines/engine_interface.h
35 engines/engine_upload.cpp 34 engines/engine_upload.cpp
@@ -44,9 +43,6 @@ add_library(video_core STATIC
44 engines/maxwell_3d.h 43 engines/maxwell_3d.h
45 engines/maxwell_dma.cpp 44 engines/maxwell_dma.cpp
46 engines/maxwell_dma.h 45 engines/maxwell_dma.h
47 engines/shader_bytecode.h
48 engines/shader_header.h
49 engines/shader_type.h
50 framebuffer_config.h 46 framebuffer_config.h
51 macro/macro.cpp 47 macro/macro.cpp
52 macro/macro.h 48 macro/macro.h
@@ -61,8 +57,6 @@ add_library(video_core STATIC
61 gpu.h 57 gpu.h
62 gpu_thread.cpp 58 gpu_thread.cpp
63 gpu_thread.h 59 gpu_thread.h
64 guest_driver.cpp
65 guest_driver.h
66 memory_manager.cpp 60 memory_manager.cpp
67 memory_manager.h 61 memory_manager.h
68 query_cache.h 62 query_cache.h
@@ -71,26 +65,25 @@ add_library(video_core STATIC
71 rasterizer_interface.h 65 rasterizer_interface.h
72 renderer_base.cpp 66 renderer_base.cpp
73 renderer_base.h 67 renderer_base.h
74 renderer_opengl/gl_arb_decompiler.cpp
75 renderer_opengl/gl_arb_decompiler.h
76 renderer_opengl/gl_buffer_cache.cpp 68 renderer_opengl/gl_buffer_cache.cpp
77 renderer_opengl/gl_buffer_cache.h 69 renderer_opengl/gl_buffer_cache.h
70 renderer_opengl/gl_compute_pipeline.cpp
71 renderer_opengl/gl_compute_pipeline.h
78 renderer_opengl/gl_device.cpp 72 renderer_opengl/gl_device.cpp
79 renderer_opengl/gl_device.h 73 renderer_opengl/gl_device.h
80 renderer_opengl/gl_fence_manager.cpp 74 renderer_opengl/gl_fence_manager.cpp
81 renderer_opengl/gl_fence_manager.h 75 renderer_opengl/gl_fence_manager.h
76 renderer_opengl/gl_graphics_pipeline.cpp
77 renderer_opengl/gl_graphics_pipeline.h
82 renderer_opengl/gl_rasterizer.cpp 78 renderer_opengl/gl_rasterizer.cpp
83 renderer_opengl/gl_rasterizer.h 79 renderer_opengl/gl_rasterizer.h
84 renderer_opengl/gl_resource_manager.cpp 80 renderer_opengl/gl_resource_manager.cpp
85 renderer_opengl/gl_resource_manager.h 81 renderer_opengl/gl_resource_manager.h
86 renderer_opengl/gl_shader_cache.cpp 82 renderer_opengl/gl_shader_cache.cpp
87 renderer_opengl/gl_shader_cache.h 83 renderer_opengl/gl_shader_cache.h
88 renderer_opengl/gl_shader_decompiler.cpp
89 renderer_opengl/gl_shader_decompiler.h
90 renderer_opengl/gl_shader_disk_cache.cpp
91 renderer_opengl/gl_shader_disk_cache.h
92 renderer_opengl/gl_shader_manager.cpp 84 renderer_opengl/gl_shader_manager.cpp
93 renderer_opengl/gl_shader_manager.h 85 renderer_opengl/gl_shader_manager.h
86 renderer_opengl/gl_shader_context.h
94 renderer_opengl/gl_shader_util.cpp 87 renderer_opengl/gl_shader_util.cpp
95 renderer_opengl/gl_shader_util.h 88 renderer_opengl/gl_shader_util.h
96 renderer_opengl/gl_state_tracker.cpp 89 renderer_opengl/gl_state_tracker.cpp
@@ -112,6 +105,7 @@ add_library(video_core STATIC
112 renderer_vulkan/fixed_pipeline_state.h 105 renderer_vulkan/fixed_pipeline_state.h
113 renderer_vulkan/maxwell_to_vk.cpp 106 renderer_vulkan/maxwell_to_vk.cpp
114 renderer_vulkan/maxwell_to_vk.h 107 renderer_vulkan/maxwell_to_vk.h
108 renderer_vulkan/pipeline_helper.h
115 renderer_vulkan/renderer_vulkan.h 109 renderer_vulkan/renderer_vulkan.h
116 renderer_vulkan/renderer_vulkan.cpp 110 renderer_vulkan/renderer_vulkan.cpp
117 renderer_vulkan/vk_blit_screen.cpp 111 renderer_vulkan/vk_blit_screen.cpp
@@ -138,12 +132,12 @@ add_library(video_core STATIC
138 renderer_vulkan/vk_query_cache.h 132 renderer_vulkan/vk_query_cache.h
139 renderer_vulkan/vk_rasterizer.cpp 133 renderer_vulkan/vk_rasterizer.cpp
140 renderer_vulkan/vk_rasterizer.h 134 renderer_vulkan/vk_rasterizer.h
135 renderer_vulkan/vk_render_pass_cache.cpp
136 renderer_vulkan/vk_render_pass_cache.h
141 renderer_vulkan/vk_resource_pool.cpp 137 renderer_vulkan/vk_resource_pool.cpp
142 renderer_vulkan/vk_resource_pool.h 138 renderer_vulkan/vk_resource_pool.h
143 renderer_vulkan/vk_scheduler.cpp 139 renderer_vulkan/vk_scheduler.cpp
144 renderer_vulkan/vk_scheduler.h 140 renderer_vulkan/vk_scheduler.h
145 renderer_vulkan/vk_shader_decompiler.cpp
146 renderer_vulkan/vk_shader_decompiler.h
147 renderer_vulkan/vk_shader_util.cpp 141 renderer_vulkan/vk_shader_util.cpp
148 renderer_vulkan/vk_shader_util.h 142 renderer_vulkan/vk_shader_util.h
149 renderer_vulkan/vk_staging_buffer_pool.cpp 143 renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -156,60 +150,12 @@ add_library(video_core STATIC
156 renderer_vulkan/vk_texture_cache.h 150 renderer_vulkan/vk_texture_cache.h
157 renderer_vulkan/vk_update_descriptor.cpp 151 renderer_vulkan/vk_update_descriptor.cpp
158 renderer_vulkan/vk_update_descriptor.h 152 renderer_vulkan/vk_update_descriptor.h
153 shader_cache.cpp
159 shader_cache.h 154 shader_cache.h
155 shader_environment.cpp
156 shader_environment.h
160 shader_notify.cpp 157 shader_notify.cpp
161 shader_notify.h 158 shader_notify.h
162 shader/decode/arithmetic.cpp
163 shader/decode/arithmetic_immediate.cpp
164 shader/decode/bfe.cpp
165 shader/decode/bfi.cpp
166 shader/decode/shift.cpp
167 shader/decode/arithmetic_integer.cpp
168 shader/decode/arithmetic_integer_immediate.cpp
169 shader/decode/arithmetic_half.cpp
170 shader/decode/arithmetic_half_immediate.cpp
171 shader/decode/ffma.cpp
172 shader/decode/hfma2.cpp
173 shader/decode/conversion.cpp
174 shader/decode/memory.cpp
175 shader/decode/texture.cpp
176 shader/decode/image.cpp
177 shader/decode/float_set_predicate.cpp
178 shader/decode/integer_set_predicate.cpp
179 shader/decode/half_set_predicate.cpp
180 shader/decode/predicate_set_register.cpp
181 shader/decode/predicate_set_predicate.cpp
182 shader/decode/register_set_predicate.cpp
183 shader/decode/float_set.cpp
184 shader/decode/integer_set.cpp
185 shader/decode/half_set.cpp
186 shader/decode/video.cpp
187 shader/decode/warp.cpp
188 shader/decode/xmad.cpp
189 shader/decode/other.cpp
190 shader/ast.cpp
191 shader/ast.h
192 shader/async_shaders.cpp
193 shader/async_shaders.h
194 shader/compiler_settings.cpp
195 shader/compiler_settings.h
196 shader/control_flow.cpp
197 shader/control_flow.h
198 shader/decode.cpp
199 shader/expr.cpp
200 shader/expr.h
201 shader/memory_util.cpp
202 shader/memory_util.h
203 shader/node_helper.cpp
204 shader/node_helper.h
205 shader/node.h
206 shader/registry.cpp
207 shader/registry.h
208 shader/shader_ir.cpp
209 shader/shader_ir.h
210 shader/track.cpp
211 shader/transform_feedback.cpp
212 shader/transform_feedback.h
213 surface.cpp 159 surface.cpp
214 surface.h 160 surface.h
215 texture_cache/accelerated_swizzle.cpp 161 texture_cache/accelerated_swizzle.cpp
@@ -242,6 +188,8 @@ add_library(video_core STATIC
242 textures/decoders.h 188 textures/decoders.h
243 textures/texture.cpp 189 textures/texture.cpp
244 textures/texture.h 190 textures/texture.h
191 transform_feedback.cpp
192 transform_feedback.h
245 video_core.cpp 193 video_core.cpp
246 video_core.h 194 video_core.h
247 vulkan_common/vulkan_debug_callback.cpp 195 vulkan_common/vulkan_debug_callback.cpp
@@ -265,7 +213,7 @@ add_library(video_core STATIC
265create_target_directory_groups(video_core) 213create_target_directory_groups(video_core)
266 214
267target_link_libraries(video_core PUBLIC common core) 215target_link_libraries(video_core PUBLIC common core)
268target_link_libraries(video_core PRIVATE glad xbyak) 216target_link_libraries(video_core PUBLIC glad shader_recompiler xbyak)
269 217
270if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) 218if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
271 add_dependencies(video_core ffmpeg-build) 219 add_dependencies(video_core ffmpeg-build)
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 7373cb62d..24c858104 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -31,6 +31,7 @@
31#include "video_core/engines/maxwell_3d.h" 31#include "video_core/engines/maxwell_3d.h"
32#include "video_core/memory_manager.h" 32#include "video_core/memory_manager.h"
33#include "video_core/rasterizer_interface.h" 33#include "video_core/rasterizer_interface.h"
34#include "video_core/surface.h"
34#include "video_core/texture_cache/slot_vector.h" 35#include "video_core/texture_cache/slot_vector.h"
35#include "video_core/texture_cache/types.h" 36#include "video_core/texture_cache/types.h"
36 37
@@ -42,14 +43,19 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory);
42 43
43using BufferId = SlotId; 44using BufferId = SlotId;
44 45
46using VideoCore::Surface::PixelFormat;
47using namespace Common::Literals;
48
45constexpr u32 NUM_VERTEX_BUFFERS = 32; 49constexpr u32 NUM_VERTEX_BUFFERS = 32;
46constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; 50constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
47constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; 51constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
48constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; 52constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
49constexpr u32 NUM_STORAGE_BUFFERS = 16; 53constexpr u32 NUM_STORAGE_BUFFERS = 16;
54constexpr u32 NUM_TEXTURE_BUFFERS = 16;
50constexpr u32 NUM_STAGES = 5; 55constexpr u32 NUM_STAGES = 5;
51 56
52using namespace Common::Literals; 57using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
58using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
53 59
54template <typename P> 60template <typename P>
55class BufferCache { 61class BufferCache {
@@ -67,6 +73,7 @@ class BufferCache {
67 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; 73 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
68 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; 74 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
69 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; 75 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
76 static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
70 77
71 static constexpr BufferId NULL_BUFFER_ID{0}; 78 static constexpr BufferId NULL_BUFFER_ID{0};
72 79
@@ -96,6 +103,10 @@ class BufferCache {
96 BufferId buffer_id; 103 BufferId buffer_id;
97 }; 104 };
98 105
106 struct TextureBufferBinding : Binding {
107 PixelFormat format;
108 };
109
99 static constexpr Binding NULL_BINDING{ 110 static constexpr Binding NULL_BINDING{
100 .cpu_addr = 0, 111 .cpu_addr = 0,
101 .size = 0, 112 .size = 0,
@@ -133,20 +144,31 @@ public:
133 144
134 void BindHostComputeBuffers(); 145 void BindHostComputeBuffers();
135 146
136 void SetEnabledUniformBuffers(size_t stage, u32 enabled); 147 void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
148 const UniformBufferSizes* sizes);
137 149
138 void SetEnabledComputeUniformBuffers(u32 enabled); 150 void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes);
139 151
140 void UnbindGraphicsStorageBuffers(size_t stage); 152 void UnbindGraphicsStorageBuffers(size_t stage);
141 153
142 void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, 154 void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
143 bool is_written); 155 bool is_written);
144 156
157 void UnbindGraphicsTextureBuffers(size_t stage);
158
159 void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size,
160 PixelFormat format, bool is_written, bool is_image);
161
145 void UnbindComputeStorageBuffers(); 162 void UnbindComputeStorageBuffers();
146 163
147 void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, 164 void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
148 bool is_written); 165 bool is_written);
149 166
167 void UnbindComputeTextureBuffers();
168
169 void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
170 bool is_written, bool is_image);
171
150 void FlushCachedWrites(); 172 void FlushCachedWrites();
151 173
152 /// Return true when there are uncommitted buffers to be downloaded 174 /// Return true when there are uncommitted buffers to be downloaded
@@ -178,6 +200,7 @@ public:
178 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); 200 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
179 201
180 std::mutex mutex; 202 std::mutex mutex;
203 Runtime& runtime;
181 204
182private: 205private:
183 template <typename Func> 206 template <typename Func>
@@ -254,12 +277,16 @@ private:
254 277
255 void BindHostGraphicsStorageBuffers(size_t stage); 278 void BindHostGraphicsStorageBuffers(size_t stage);
256 279
280 void BindHostGraphicsTextureBuffers(size_t stage);
281
257 void BindHostTransformFeedbackBuffers(); 282 void BindHostTransformFeedbackBuffers();
258 283
259 void BindHostComputeUniformBuffers(); 284 void BindHostComputeUniformBuffers();
260 285
261 void BindHostComputeStorageBuffers(); 286 void BindHostComputeStorageBuffers();
262 287
288 void BindHostComputeTextureBuffers();
289
263 void DoUpdateGraphicsBuffers(bool is_indexed); 290 void DoUpdateGraphicsBuffers(bool is_indexed);
264 291
265 void DoUpdateComputeBuffers(); 292 void DoUpdateComputeBuffers();
@@ -274,6 +301,8 @@ private:
274 301
275 void UpdateStorageBuffers(size_t stage); 302 void UpdateStorageBuffers(size_t stage);
276 303
304 void UpdateTextureBuffers(size_t stage);
305
277 void UpdateTransformFeedbackBuffers(); 306 void UpdateTransformFeedbackBuffers();
278 307
279 void UpdateTransformFeedbackBuffer(u32 index); 308 void UpdateTransformFeedbackBuffer(u32 index);
@@ -282,6 +311,8 @@ private:
282 311
283 void UpdateComputeStorageBuffers(); 312 void UpdateComputeStorageBuffers();
284 313
314 void UpdateComputeTextureBuffers();
315
285 void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); 316 void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size);
286 317
287 [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); 318 [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size);
@@ -323,6 +354,9 @@ private:
323 354
324 [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; 355 [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const;
325 356
357 [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
358 PixelFormat format);
359
326 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); 360 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size);
327 361
328 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); 362 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
@@ -336,7 +370,6 @@ private:
336 Tegra::Engines::KeplerCompute& kepler_compute; 370 Tegra::Engines::KeplerCompute& kepler_compute;
337 Tegra::MemoryManager& gpu_memory; 371 Tegra::MemoryManager& gpu_memory;
338 Core::Memory::Memory& cpu_memory; 372 Core::Memory::Memory& cpu_memory;
339 Runtime& runtime;
340 373
341 SlotVector<Buffer> slot_buffers; 374 SlotVector<Buffer> slot_buffers;
342 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; 375 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
@@ -347,20 +380,30 @@ private:
347 std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; 380 std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
348 std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; 381 std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
349 std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; 382 std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
383 std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
350 std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; 384 std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
351 385
352 std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; 386 std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
353 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; 387 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
388 std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
354 389
355 std::array<u32, NUM_STAGES> enabled_uniform_buffers{}; 390 std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
356 u32 enabled_compute_uniform_buffers = 0; 391 u32 enabled_compute_uniform_buffer_mask = 0;
392
393 const UniformBufferSizes* uniform_buffer_sizes{};
394 const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
357 395
358 std::array<u32, NUM_STAGES> enabled_storage_buffers{}; 396 std::array<u32, NUM_STAGES> enabled_storage_buffers{};
359 std::array<u32, NUM_STAGES> written_storage_buffers{}; 397 std::array<u32, NUM_STAGES> written_storage_buffers{};
360 u32 enabled_compute_storage_buffers = 0; 398 u32 enabled_compute_storage_buffers = 0;
361 u32 written_compute_storage_buffers = 0; 399 u32 written_compute_storage_buffers = 0;
362 400
363 std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; 401 std::array<u32, NUM_STAGES> enabled_texture_buffers{};
402 std::array<u32, NUM_STAGES> written_texture_buffers{};
403 std::array<u32, NUM_STAGES> image_texture_buffers{};
404 u32 enabled_compute_texture_buffers = 0;
405 u32 written_compute_texture_buffers = 0;
406 u32 image_compute_texture_buffers = 0;
364 407
365 std::array<u32, 16> uniform_cache_hits{}; 408 std::array<u32, 16> uniform_cache_hits{};
366 std::array<u32, 16> uniform_cache_shots{}; 409 std::array<u32, 16> uniform_cache_shots{};
@@ -371,6 +414,10 @@ private:
371 414
372 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> 415 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
373 dirty_uniform_buffers{}; 416 dirty_uniform_buffers{};
417 std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{};
418 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS,
419 std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty>
420 uniform_buffer_binding_sizes{};
374 421
375 std::vector<BufferId> cached_write_buffer_ids; 422 std::vector<BufferId> cached_write_buffer_ids;
376 423
@@ -394,8 +441,8 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
394 Tegra::Engines::KeplerCompute& kepler_compute_, 441 Tegra::Engines::KeplerCompute& kepler_compute_,
395 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 442 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
396 Runtime& runtime_) 443 Runtime& runtime_)
397 : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, 444 : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
398 gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { 445 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
399 // Ensure the first slot is used for the null buffer 446 // Ensure the first slot is used for the null buffer
400 void(slot_buffers.insert(runtime, NullBufferParams{})); 447 void(slot_buffers.insert(runtime, NullBufferParams{}));
401 deletion_iterator = slot_buffers.end(); 448 deletion_iterator = slot_buffers.end();
@@ -553,13 +600,9 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
553 ClearDownload(subtract_interval); 600 ClearDownload(subtract_interval);
554 common_ranges.subtract(subtract_interval); 601 common_ranges.subtract(subtract_interval);
555 602
556 BufferId buffer; 603 const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size));
557 do {
558 has_deleted_buffers = false;
559 buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size));
560 } while (has_deleted_buffers);
561 auto& dest_buffer = slot_buffers[buffer]; 604 auto& dest_buffer = slot_buffers[buffer];
562 const u32 offset = static_cast<u32>(*cpu_dst_address - dest_buffer.CpuAddr()); 605 const u32 offset = dest_buffer.Offset(*cpu_dst_address);
563 runtime.ClearBuffer(dest_buffer, offset, size, value); 606 runtime.ClearBuffer(dest_buffer, offset, size, value);
564 return true; 607 return true;
565} 608}
@@ -619,6 +662,7 @@ void BufferCache<P>::BindHostStageBuffers(size_t stage) {
619 MICROPROFILE_SCOPE(GPU_BindUploadBuffers); 662 MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
620 BindHostGraphicsUniformBuffers(stage); 663 BindHostGraphicsUniformBuffers(stage);
621 BindHostGraphicsStorageBuffers(stage); 664 BindHostGraphicsStorageBuffers(stage);
665 BindHostGraphicsTextureBuffers(stage);
622} 666}
623 667
624template <class P> 668template <class P>
@@ -626,21 +670,30 @@ void BufferCache<P>::BindHostComputeBuffers() {
626 MICROPROFILE_SCOPE(GPU_BindUploadBuffers); 670 MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
627 BindHostComputeUniformBuffers(); 671 BindHostComputeUniformBuffers();
628 BindHostComputeStorageBuffers(); 672 BindHostComputeStorageBuffers();
673 BindHostComputeTextureBuffers();
629} 674}
630 675
631template <class P> 676template <class P>
632void BufferCache<P>::SetEnabledUniformBuffers(size_t stage, u32 enabled) { 677void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
678 const UniformBufferSizes* sizes) {
633 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 679 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
634 if (enabled_uniform_buffers[stage] != enabled) { 680 if (enabled_uniform_buffer_masks != mask) {
635 dirty_uniform_buffers[stage] = ~u32{0}; 681 if constexpr (IS_OPENGL) {
682 fast_bound_uniform_buffers.fill(0);
683 }
684 dirty_uniform_buffers.fill(~u32{0});
685 uniform_buffer_binding_sizes.fill({});
636 } 686 }
637 } 687 }
638 enabled_uniform_buffers[stage] = enabled; 688 enabled_uniform_buffer_masks = mask;
689 uniform_buffer_sizes = sizes;
639} 690}
640 691
641template <class P> 692template <class P>
642void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) { 693void BufferCache<P>::SetComputeUniformBufferState(u32 mask,
643 enabled_compute_uniform_buffers = enabled; 694 const ComputeUniformBufferSizes* sizes) {
695 enabled_compute_uniform_buffer_mask = mask;
696 compute_uniform_buffer_sizes = sizes;
644} 697}
645 698
646template <class P> 699template <class P>
@@ -661,9 +714,29 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index,
661} 714}
662 715
663template <class P> 716template <class P>
717void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) {
718 enabled_texture_buffers[stage] = 0;
719 written_texture_buffers[stage] = 0;
720 image_texture_buffers[stage] = 0;
721}
722
723template <class P>
724void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr,
725 u32 size, PixelFormat format, bool is_written,
726 bool is_image) {
727 enabled_texture_buffers[stage] |= 1U << tbo_index;
728 written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index;
729 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
730 image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index;
731 }
732 texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
733}
734
735template <class P>
664void BufferCache<P>::UnbindComputeStorageBuffers() { 736void BufferCache<P>::UnbindComputeStorageBuffers() {
665 enabled_compute_storage_buffers = 0; 737 enabled_compute_storage_buffers = 0;
666 written_compute_storage_buffers = 0; 738 written_compute_storage_buffers = 0;
739 image_compute_texture_buffers = 0;
667} 740}
668 741
669template <class P> 742template <class P>
@@ -681,6 +754,24 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
681} 754}
682 755
683template <class P> 756template <class P>
757void BufferCache<P>::UnbindComputeTextureBuffers() {
758 enabled_compute_texture_buffers = 0;
759 written_compute_texture_buffers = 0;
760 image_compute_texture_buffers = 0;
761}
762
763template <class P>
764void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size,
765 PixelFormat format, bool is_written, bool is_image) {
766 enabled_compute_texture_buffers |= 1U << tbo_index;
767 written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index;
768 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
769 image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index;
770 }
771 compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
772}
773
774template <class P>
684void BufferCache<P>::FlushCachedWrites() { 775void BufferCache<P>::FlushCachedWrites() {
685 for (const BufferId buffer_id : cached_write_buffer_ids) { 776 for (const BufferId buffer_id : cached_write_buffer_ids) {
686 slot_buffers[buffer_id].FlushCachedWrites(); 777 slot_buffers[buffer_id].FlushCachedWrites();
@@ -905,7 +996,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
905 dirty = std::exchange(dirty_uniform_buffers[stage], 0); 996 dirty = std::exchange(dirty_uniform_buffers[stage], 0);
906 } 997 }
907 u32 binding_index = 0; 998 u32 binding_index = 0;
908 ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { 999 ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
909 const bool needs_bind = ((dirty >> index) & 1) != 0; 1000 const bool needs_bind = ((dirty >> index) & 1) != 0;
910 BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); 1001 BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind);
911 if constexpr (NEEDS_BIND_UNIFORM_INDEX) { 1002 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
@@ -919,7 +1010,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
919 bool needs_bind) { 1010 bool needs_bind) {
920 const Binding& binding = uniform_buffers[stage][index]; 1011 const Binding& binding = uniform_buffers[stage][index];
921 const VAddr cpu_addr = binding.cpu_addr; 1012 const VAddr cpu_addr = binding.cpu_addr;
922 const u32 size = binding.size; 1013 const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);
923 Buffer& buffer = slot_buffers[binding.buffer_id]; 1014 Buffer& buffer = slot_buffers[binding.buffer_id];
924 TouchBuffer(buffer); 1015 TouchBuffer(buffer);
925 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && 1016 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
@@ -929,8 +1020,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
929 if constexpr (IS_OPENGL) { 1020 if constexpr (IS_OPENGL) {
930 if (runtime.HasFastBufferSubData()) { 1021 if (runtime.HasFastBufferSubData()) {
931 // Fast path for Nvidia 1022 // Fast path for Nvidia
932 if (!HasFastUniformBufferBound(stage, binding_index)) { 1023 const bool should_fast_bind =
1024 !HasFastUniformBufferBound(stage, binding_index) ||
1025 uniform_buffer_binding_sizes[stage][binding_index] != size;
1026 if (should_fast_bind) {
933 // We only have to bind when the currently bound buffer is not the fast version 1027 // We only have to bind when the currently bound buffer is not the fast version
1028 fast_bound_uniform_buffers[stage] |= 1U << binding_index;
1029 uniform_buffer_binding_sizes[stage][binding_index] = size;
934 runtime.BindFastUniformBuffer(stage, binding_index, size); 1030 runtime.BindFastUniformBuffer(stage, binding_index, size);
935 } 1031 }
936 const auto span = ImmediateBufferWithData(cpu_addr, size); 1032 const auto span = ImmediateBufferWithData(cpu_addr, size);
@@ -938,8 +1034,10 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
938 return; 1034 return;
939 } 1035 }
940 } 1036 }
941 fast_bound_uniform_buffers[stage] |= 1U << binding_index; 1037 if constexpr (IS_OPENGL) {
942 1038 fast_bound_uniform_buffers[stage] |= 1U << binding_index;
1039 uniform_buffer_binding_sizes[stage][binding_index] = size;
1040 }
943 // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan 1041 // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
944 const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); 1042 const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
945 cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); 1043 cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
@@ -952,14 +1050,27 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
952 } 1050 }
953 ++uniform_cache_shots[0]; 1051 ++uniform_cache_shots[0];
954 1052
955 if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) { 1053 // Skip binding if it's not needed and if the bound buffer is not the fast version
956 // Skip binding if it's not needed and if the bound buffer is not the fast version 1054 // This exists to avoid instances where the fast buffer is bound and a GPU write happens
957 // This exists to avoid instances where the fast buffer is bound and a GPU write happens 1055 needs_bind |= HasFastUniformBufferBound(stage, binding_index);
1056 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
1057 needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size;
1058 }
1059 if (!needs_bind) {
958 return; 1060 return;
959 } 1061 }
960 fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
961
962 const u32 offset = buffer.Offset(cpu_addr); 1062 const u32 offset = buffer.Offset(cpu_addr);
1063 if constexpr (IS_OPENGL) {
1064 // Fast buffer will be unbound
1065 fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
1066
1067 // Mark the index as dirty if offset doesn't match
1068 const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
1069 dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
1070 }
1071 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
1072 uniform_buffer_binding_sizes[stage][binding_index] = size;
1073 }
963 if constexpr (NEEDS_BIND_UNIFORM_INDEX) { 1074 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
964 runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); 1075 runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
965 } else { 1076 } else {
@@ -989,6 +1100,28 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
989} 1100}
990 1101
991template <class P> 1102template <class P>
1103void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
1104 ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
1105 const TextureBufferBinding& binding = texture_buffers[stage][index];
1106 Buffer& buffer = slot_buffers[binding.buffer_id];
1107 const u32 size = binding.size;
1108 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1109
1110 const u32 offset = buffer.Offset(binding.cpu_addr);
1111 const PixelFormat format = binding.format;
1112 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
1113 if (((image_texture_buffers[stage] >> index) & 1) != 0) {
1114 runtime.BindImageBuffer(buffer, offset, size, format);
1115 } else {
1116 runtime.BindTextureBuffer(buffer, offset, size, format);
1117 }
1118 } else {
1119 runtime.BindTextureBuffer(buffer, offset, size, format);
1120 }
1121 });
1122}
1123
1124template <class P>
992void BufferCache<P>::BindHostTransformFeedbackBuffers() { 1125void BufferCache<P>::BindHostTransformFeedbackBuffers() {
993 if (maxwell3d.regs.tfb_enabled == 0) { 1126 if (maxwell3d.regs.tfb_enabled == 0) {
994 return; 1127 return;
@@ -1010,13 +1143,14 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
1010 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 1143 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
1011 // Mark all uniform buffers as dirty 1144 // Mark all uniform buffers as dirty
1012 dirty_uniform_buffers.fill(~u32{0}); 1145 dirty_uniform_buffers.fill(~u32{0});
1146 fast_bound_uniform_buffers.fill(0);
1013 } 1147 }
1014 u32 binding_index = 0; 1148 u32 binding_index = 0;
1015 ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { 1149 ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
1016 const Binding& binding = compute_uniform_buffers[index]; 1150 const Binding& binding = compute_uniform_buffers[index];
1017 Buffer& buffer = slot_buffers[binding.buffer_id]; 1151 Buffer& buffer = slot_buffers[binding.buffer_id];
1018 TouchBuffer(buffer); 1152 TouchBuffer(buffer);
1019 const u32 size = binding.size; 1153 const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);
1020 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1154 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1021 1155
1022 const u32 offset = buffer.Offset(binding.cpu_addr); 1156 const u32 offset = buffer.Offset(binding.cpu_addr);
@@ -1051,6 +1185,28 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
1051} 1185}
1052 1186
1053template <class P> 1187template <class P>
1188void BufferCache<P>::BindHostComputeTextureBuffers() {
1189 ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
1190 const TextureBufferBinding& binding = compute_texture_buffers[index];
1191 Buffer& buffer = slot_buffers[binding.buffer_id];
1192 const u32 size = binding.size;
1193 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1194
1195 const u32 offset = buffer.Offset(binding.cpu_addr);
1196 const PixelFormat format = binding.format;
1197 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
1198 if (((image_compute_texture_buffers >> index) & 1) != 0) {
1199 runtime.BindImageBuffer(buffer, offset, size, format);
1200 } else {
1201 runtime.BindTextureBuffer(buffer, offset, size, format);
1202 }
1203 } else {
1204 runtime.BindTextureBuffer(buffer, offset, size, format);
1205 }
1206 });
1207}
1208
1209template <class P>
1054void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { 1210void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
1055 if (is_indexed) { 1211 if (is_indexed) {
1056 UpdateIndexBuffer(); 1212 UpdateIndexBuffer();
@@ -1060,6 +1216,7 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
1060 for (size_t stage = 0; stage < NUM_STAGES; ++stage) { 1216 for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
1061 UpdateUniformBuffers(stage); 1217 UpdateUniformBuffers(stage);
1062 UpdateStorageBuffers(stage); 1218 UpdateStorageBuffers(stage);
1219 UpdateTextureBuffers(stage);
1063 } 1220 }
1064} 1221}
1065 1222
@@ -1067,6 +1224,7 @@ template <class P>
1067void BufferCache<P>::DoUpdateComputeBuffers() { 1224void BufferCache<P>::DoUpdateComputeBuffers() {
1068 UpdateComputeUniformBuffers(); 1225 UpdateComputeUniformBuffers();
1069 UpdateComputeStorageBuffers(); 1226 UpdateComputeStorageBuffers();
1227 UpdateComputeTextureBuffers();
1070} 1228}
1071 1229
1072template <class P> 1230template <class P>
@@ -1136,7 +1294,7 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1136 1294
1137template <class P> 1295template <class P>
1138void BufferCache<P>::UpdateUniformBuffers(size_t stage) { 1296void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
1139 ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { 1297 ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
1140 Binding& binding = uniform_buffers[stage][index]; 1298 Binding& binding = uniform_buffers[stage][index];
1141 if (binding.buffer_id) { 1299 if (binding.buffer_id) {
1142 // Already updated 1300 // Already updated
@@ -1167,6 +1325,18 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
1167} 1325}
1168 1326
1169template <class P> 1327template <class P>
1328void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
1329 ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
1330 Binding& binding = texture_buffers[stage][index];
1331 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
1332 // Mark buffer as written if needed
1333 if (((written_texture_buffers[stage] >> index) & 1) != 0) {
1334 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
1335 }
1336 });
1337}
1338
1339template <class P>
1170void BufferCache<P>::UpdateTransformFeedbackBuffers() { 1340void BufferCache<P>::UpdateTransformFeedbackBuffers() {
1171 if (maxwell3d.regs.tfb_enabled == 0) { 1341 if (maxwell3d.regs.tfb_enabled == 0) {
1172 return; 1342 return;
@@ -1197,7 +1367,7 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
1197 1367
1198template <class P> 1368template <class P>
1199void BufferCache<P>::UpdateComputeUniformBuffers() { 1369void BufferCache<P>::UpdateComputeUniformBuffers() {
1200 ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { 1370 ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
1201 Binding& binding = compute_uniform_buffers[index]; 1371 Binding& binding = compute_uniform_buffers[index];
1202 binding = NULL_BINDING; 1372 binding = NULL_BINDING;
1203 const auto& launch_desc = kepler_compute.launch_description; 1373 const auto& launch_desc = kepler_compute.launch_description;
@@ -1218,11 +1388,22 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
1218 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { 1388 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
1219 // Resolve buffer 1389 // Resolve buffer
1220 Binding& binding = compute_storage_buffers[index]; 1390 Binding& binding = compute_storage_buffers[index];
1221 const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1391 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
1222 binding.buffer_id = buffer_id;
1223 // Mark as written if needed 1392 // Mark as written if needed
1224 if (((written_compute_storage_buffers >> index) & 1) != 0) { 1393 if (((written_compute_storage_buffers >> index) & 1) != 0) {
1225 MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); 1394 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
1395 }
1396 });
1397}
1398
1399template <class P>
1400void BufferCache<P>::UpdateComputeTextureBuffers() {
1401 ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
1402 Binding& binding = compute_texture_buffers[index];
1403 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
1404 // Mark as written if needed
1405 if (((written_compute_texture_buffers >> index) & 1) != 0) {
1406 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
1226 } 1407 }
1227 }); 1408 });
1228} 1409}
@@ -1555,6 +1736,7 @@ template <class P>
1555void BufferCache<P>::NotifyBufferDeletion() { 1736void BufferCache<P>::NotifyBufferDeletion() {
1556 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 1737 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
1557 dirty_uniform_buffers.fill(~u32{0}); 1738 dirty_uniform_buffers.fill(~u32{0});
1739 uniform_buffer_binding_sizes.fill({});
1558 } 1740 }
1559 auto& flags = maxwell3d.dirty.flags; 1741 auto& flags = maxwell3d.dirty.flags;
1560 flags[Dirty::IndexBuffer] = true; 1742 flags[Dirty::IndexBuffer] = true;
@@ -1582,6 +1764,25 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
1582} 1764}
1583 1765
1584template <class P> 1766template <class P>
1767typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
1768 GPUVAddr gpu_addr, u32 size, PixelFormat format) {
1769 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1770 TextureBufferBinding binding;
1771 if (!cpu_addr || size == 0) {
1772 binding.cpu_addr = 0;
1773 binding.size = 0;
1774 binding.buffer_id = NULL_BUFFER_ID;
1775 binding.format = PixelFormat::Invalid;
1776 } else {
1777 binding.cpu_addr = *cpu_addr;
1778 binding.size = size;
1779 binding.buffer_id = BufferId{};
1780 binding.format = format;
1781 }
1782 return binding;
1783}
1784
1785template <class P>
1585std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { 1786std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) {
1586 u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); 1787 u8* const base_pointer = cpu_memory.GetPointer(cpu_addr);
1587 if (IsRangeGranular(cpu_addr, size) || 1788 if (IsRangeGranular(cpu_addr, size) ||
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp
index 7149af290..b1be065c3 100644
--- a/src/video_core/dirty_flags.cpp
+++ b/src/video_core/dirty_flags.cpp
@@ -58,6 +58,11 @@ void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) {
58 FillBlock(table, OFF(zeta), NUM(zeta), flag); 58 FillBlock(table, OFF(zeta), NUM(zeta), flag);
59 } 59 }
60} 60}
61
62void SetupDirtyShaders(Maxwell3D::DirtyState::Tables& tables) {
63 FillBlock(tables[0], OFF(shader_config[0]),
64 NUM(shader_config[0]) * Maxwell3D::Regs::MaxShaderProgram, Shaders);
65}
61} // Anonymous namespace 66} // Anonymous namespace
62 67
63void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { 68void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
@@ -65,6 +70,7 @@ void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
65 SetupIndexBuffer(tables); 70 SetupIndexBuffer(tables);
66 SetupDirtyDescriptors(tables); 71 SetupDirtyDescriptors(tables);
67 SetupDirtyRenderTargets(tables); 72 SetupDirtyRenderTargets(tables);
73 SetupDirtyShaders(tables);
68} 74}
69 75
70} // namespace VideoCommon::Dirty 76} // namespace VideoCommon::Dirty
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
index 702688ace..504465d3f 100644
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -36,6 +36,8 @@ enum : u8 {
36 36
37 IndexBuffer, 37 IndexBuffer,
38 38
39 Shaders,
40
39 LastCommonEntry, 41 LastCommonEntry,
40}; 42};
41 43
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
deleted file mode 100644
index f46e81bb7..000000000
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ /dev/null
@@ -1,103 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <type_traits>
8#include "common/bit_field.h"
9#include "common/common_types.h"
10#include "video_core/engines/shader_bytecode.h"
11#include "video_core/engines/shader_type.h"
12#include "video_core/guest_driver.h"
13#include "video_core/textures/texture.h"
14
15namespace Tegra::Engines {
16
17struct SamplerDescriptor {
18 union {
19 u32 raw = 0;
20 BitField<0, 2, Tegra::Shader::TextureType> texture_type;
21 BitField<2, 3, Tegra::Texture::ComponentType> r_type;
22 BitField<5, 1, u32> is_array;
23 BitField<6, 1, u32> is_buffer;
24 BitField<7, 1, u32> is_shadow;
25 BitField<8, 3, Tegra::Texture::ComponentType> g_type;
26 BitField<11, 3, Tegra::Texture::ComponentType> b_type;
27 BitField<14, 3, Tegra::Texture::ComponentType> a_type;
28 BitField<17, 7, Tegra::Texture::TextureFormat> format;
29 };
30
31 bool operator==(const SamplerDescriptor& rhs) const noexcept {
32 return raw == rhs.raw;
33 }
34
35 bool operator!=(const SamplerDescriptor& rhs) const noexcept {
36 return !operator==(rhs);
37 }
38
39 static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) {
40 using Tegra::Shader::TextureType;
41 SamplerDescriptor result;
42
43 result.format.Assign(tic.format.Value());
44 result.r_type.Assign(tic.r_type.Value());
45 result.g_type.Assign(tic.g_type.Value());
46 result.b_type.Assign(tic.b_type.Value());
47 result.a_type.Assign(tic.a_type.Value());
48
49 switch (tic.texture_type.Value()) {
50 case Tegra::Texture::TextureType::Texture1D:
51 result.texture_type.Assign(TextureType::Texture1D);
52 return result;
53 case Tegra::Texture::TextureType::Texture2D:
54 result.texture_type.Assign(TextureType::Texture2D);
55 return result;
56 case Tegra::Texture::TextureType::Texture3D:
57 result.texture_type.Assign(TextureType::Texture3D);
58 return result;
59 case Tegra::Texture::TextureType::TextureCubemap:
60 result.texture_type.Assign(TextureType::TextureCube);
61 return result;
62 case Tegra::Texture::TextureType::Texture1DArray:
63 result.texture_type.Assign(TextureType::Texture1D);
64 result.is_array.Assign(1);
65 return result;
66 case Tegra::Texture::TextureType::Texture2DArray:
67 result.texture_type.Assign(TextureType::Texture2D);
68 result.is_array.Assign(1);
69 return result;
70 case Tegra::Texture::TextureType::Texture1DBuffer:
71 result.texture_type.Assign(TextureType::Texture1D);
72 result.is_buffer.Assign(1);
73 return result;
74 case Tegra::Texture::TextureType::Texture2DNoMipmap:
75 result.texture_type.Assign(TextureType::Texture2D);
76 return result;
77 case Tegra::Texture::TextureType::TextureCubeArray:
78 result.texture_type.Assign(TextureType::TextureCube);
79 result.is_array.Assign(1);
80 return result;
81 default:
82 result.texture_type.Assign(TextureType::Texture2D);
83 return result;
84 }
85 }
86};
87static_assert(std::is_trivially_copyable_v<SamplerDescriptor>);
88
89class ConstBufferEngineInterface {
90public:
91 virtual ~ConstBufferEngineInterface() = default;
92 virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0;
93 virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
94 virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
95 u64 offset) const = 0;
96 virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
97 virtual u32 GetBoundBuffer() const = 0;
98
99 virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
100 virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0;
101};
102
103} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index a9b75091e..492b4c5a3 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -8,7 +8,6 @@
8#include "core/core.h" 8#include "core/core.h"
9#include "video_core/engines/kepler_compute.h" 9#include "video_core/engines/kepler_compute.h"
10#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
11#include "video_core/engines/shader_type.h"
12#include "video_core/memory_manager.h" 11#include "video_core/memory_manager.h"
13#include "video_core/rasterizer_interface.h" 12#include "video_core/rasterizer_interface.h"
14#include "video_core/renderer_base.h" 13#include "video_core/renderer_base.h"
@@ -57,53 +56,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
57 } 56 }
58} 57}
59 58
60u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
61 ASSERT(stage == ShaderType::Compute);
62 const auto& buffer = launch_description.const_buffer_config[const_buffer];
63 u32 result;
64 std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32));
65 return result;
66}
67
68SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const {
69 return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
70}
71
72SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
73 u64 offset) const {
74 ASSERT(stage == ShaderType::Compute);
75 const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
76 const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
77 return AccessSampler(memory_manager.Read<u32>(tex_info_address));
78}
79
80SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
81 const Texture::TextureHandle tex_handle{handle};
82 const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
83 const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
84
85 SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
86 result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
87 return result;
88}
89
90VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
91 return rasterizer->AccessGuestDriverProfile();
92}
93
94const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
95 return rasterizer->AccessGuestDriverProfile();
96}
97
98void KeplerCompute::ProcessLaunch() { 59void KeplerCompute::ProcessLaunch() {
99 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); 60 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
100 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, 61 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
101 LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); 62 LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
102 63 rasterizer->DispatchCompute();
103 const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
104 LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
105
106 rasterizer->DispatchCompute(code_addr);
107} 64}
108 65
109Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { 66Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 7c40cba38..f8b8d06ac 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -10,10 +10,8 @@
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/const_buffer_engine_interface.h"
14#include "video_core/engines/engine_interface.h" 13#include "video_core/engines/engine_interface.h"
15#include "video_core/engines/engine_upload.h" 14#include "video_core/engines/engine_upload.h"
16#include "video_core/engines/shader_type.h"
17#include "video_core/gpu.h" 15#include "video_core/gpu.h"
18#include "video_core/textures/texture.h" 16#include "video_core/textures/texture.h"
19 17
@@ -40,7 +38,7 @@ namespace Tegra::Engines {
40#define KEPLER_COMPUTE_REG_INDEX(field_name) \ 38#define KEPLER_COMPUTE_REG_INDEX(field_name) \
41 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) 39 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
42 40
43class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface { 41class KeplerCompute final : public EngineInterface {
44public: 42public:
45 explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); 43 explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
46 ~KeplerCompute(); 44 ~KeplerCompute();
@@ -209,23 +207,6 @@ public:
209 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, 207 void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
210 u32 methods_pending) override; 208 u32 methods_pending) override;
211 209
212 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
213
214 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
215
216 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
217 u64 offset) const override;
218
219 SamplerDescriptor AccessSampler(u32 handle) const override;
220
221 u32 GetBoundBuffer() const override {
222 return regs.tex_cb_index;
223 }
224
225 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
226
227 const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
228
229private: 210private:
230 void ProcessLaunch(); 211 void ProcessLaunch();
231 212
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index aab6b8f7a..b18b8a02a 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -8,7 +8,6 @@
8#include "core/core.h" 8#include "core/core.h"
9#include "core/core_timing.h" 9#include "core/core_timing.h"
10#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
11#include "video_core/engines/shader_type.h"
12#include "video_core/gpu.h" 11#include "video_core/gpu.h"
13#include "video_core/memory_manager.h" 12#include "video_core/memory_manager.h"
14#include "video_core/rasterizer_interface.h" 13#include "video_core/rasterizer_interface.h"
@@ -670,42 +669,4 @@ void Maxwell3D::ProcessClearBuffers() {
670 rasterizer->Clear(); 669 rasterizer->Clear();
671} 670}
672 671
673u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
674 ASSERT(stage != ShaderType::Compute);
675 const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
676 const auto& buffer = shader_stage.const_buffers[const_buffer];
677 return memory_manager.Read<u32>(buffer.address + offset);
678}
679
680SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
681 return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
682}
683
684SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
685 u64 offset) const {
686 ASSERT(stage != ShaderType::Compute);
687 const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
688 const auto& tex_info_buffer = shader.const_buffers[const_buffer];
689 const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
690 return AccessSampler(memory_manager.Read<u32>(tex_info_address));
691}
692
693SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
694 const Texture::TextureHandle tex_handle{handle};
695 const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
696 const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
697
698 SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
699 result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
700 return result;
701}
702
703VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
704 return rasterizer->AccessGuestDriverProfile();
705}
706
707const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
708 return rasterizer->AccessGuestDriverProfile();
709}
710
711} // namespace Tegra::Engines 672} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 335383955..1aa43523a 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -17,11 +17,9 @@
17#include "common/common_funcs.h" 17#include "common/common_funcs.h"
18#include "common/common_types.h" 18#include "common/common_types.h"
19#include "common/math_util.h" 19#include "common/math_util.h"
20#include "video_core/engines/const_buffer_engine_interface.h"
21#include "video_core/engines/const_buffer_info.h" 20#include "video_core/engines/const_buffer_info.h"
22#include "video_core/engines/engine_interface.h" 21#include "video_core/engines/engine_interface.h"
23#include "video_core/engines/engine_upload.h" 22#include "video_core/engines/engine_upload.h"
24#include "video_core/engines/shader_type.h"
25#include "video_core/gpu.h" 23#include "video_core/gpu.h"
26#include "video_core/macro/macro.h" 24#include "video_core/macro/macro.h"
27#include "video_core/textures/texture.h" 25#include "video_core/textures/texture.h"
@@ -49,7 +47,7 @@ namespace Tegra::Engines {
49#define MAXWELL3D_REG_INDEX(field_name) \ 47#define MAXWELL3D_REG_INDEX(field_name) \
50 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) 48 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
51 49
52class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { 50class Maxwell3D final : public EngineInterface {
53public: 51public:
54 explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); 52 explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager);
55 ~Maxwell3D(); 53 ~Maxwell3D();
@@ -307,10 +305,6 @@ public:
307 return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); 305 return (type == Type::SignedNorm) || (type == Type::UnsignedNorm);
308 } 306 }
309 307
310 bool IsConstant() const {
311 return constant;
312 }
313
314 bool IsValid() const { 308 bool IsValid() const {
315 return size != Size::Invalid; 309 return size != Size::Invalid;
316 } 310 }
@@ -912,7 +906,11 @@ public:
912 906
913 u32 fill_rectangle; 907 u32 fill_rectangle;
914 908
915 INSERT_PADDING_WORDS_NOINIT(0x8); 909 INSERT_PADDING_WORDS_NOINIT(0x2);
910
911 u32 conservative_raster_enable;
912
913 INSERT_PADDING_WORDS_NOINIT(0x5);
916 914
917 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; 915 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
918 916
@@ -959,7 +957,11 @@ public:
959 957
960 SamplerIndex sampler_index; 958 SamplerIndex sampler_index;
961 959
962 INSERT_PADDING_WORDS_NOINIT(0x25); 960 INSERT_PADDING_WORDS_NOINIT(0x2);
961
962 std::array<u32, 8> gp_passthrough_mask;
963
964 INSERT_PADDING_WORDS_NOINIT(0x1B);
963 965
964 u32 depth_test_enable; 966 u32 depth_test_enable;
965 967
@@ -1152,7 +1154,11 @@ public:
1152 u32 index; 1154 u32 index;
1153 } primitive_restart; 1155 } primitive_restart;
1154 1156
1155 INSERT_PADDING_WORDS_NOINIT(0x5F); 1157 INSERT_PADDING_WORDS_NOINIT(0xE);
1158
1159 u32 provoking_vertex_last;
1160
1161 INSERT_PADDING_WORDS_NOINIT(0x50);
1156 1162
1157 struct { 1163 struct {
1158 u32 start_addr_high; 1164 u32 start_addr_high;
@@ -1424,23 +1430,6 @@ public:
1424 1430
1425 void FlushMMEInlineDraw(); 1431 void FlushMMEInlineDraw();
1426 1432
1427 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
1428
1429 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
1430
1431 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
1432 u64 offset) const override;
1433
1434 SamplerDescriptor AccessSampler(u32 handle) const override;
1435
1436 u32 GetBoundBuffer() const override {
1437 return regs.tex_cb_index;
1438 }
1439
1440 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
1441
1442 const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
1443
1444 bool ShouldExecute() const { 1433 bool ShouldExecute() const {
1445 return execute_on; 1434 return execute_on;
1446 } 1435 }
@@ -1630,6 +1619,7 @@ ASSERT_REG_POSITION(zeta, 0x3F8);
1630ASSERT_REG_POSITION(render_area, 0x3FD); 1619ASSERT_REG_POSITION(render_area, 0x3FD);
1631ASSERT_REG_POSITION(clear_flags, 0x43E); 1620ASSERT_REG_POSITION(clear_flags, 0x43E);
1632ASSERT_REG_POSITION(fill_rectangle, 0x44F); 1621ASSERT_REG_POSITION(fill_rectangle, 0x44F);
1622ASSERT_REG_POSITION(conservative_raster_enable, 0x452);
1633ASSERT_REG_POSITION(vertex_attrib_format, 0x458); 1623ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
1634ASSERT_REG_POSITION(multisample_sample_locations, 0x478); 1624ASSERT_REG_POSITION(multisample_sample_locations, 0x478);
1635ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); 1625ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
@@ -1638,6 +1628,7 @@ ASSERT_REG_POSITION(zeta_width, 0x48a);
1638ASSERT_REG_POSITION(zeta_height, 0x48b); 1628ASSERT_REG_POSITION(zeta_height, 0x48b);
1639ASSERT_REG_POSITION(zeta_depth, 0x48c); 1629ASSERT_REG_POSITION(zeta_depth, 0x48c);
1640ASSERT_REG_POSITION(sampler_index, 0x48D); 1630ASSERT_REG_POSITION(sampler_index, 0x48D);
1631ASSERT_REG_POSITION(gp_passthrough_mask, 0x490);
1641ASSERT_REG_POSITION(depth_test_enable, 0x4B3); 1632ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
1642ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); 1633ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
1643ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); 1634ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
@@ -1690,6 +1681,7 @@ ASSERT_REG_POSITION(point_coord_replace, 0x581);
1690ASSERT_REG_POSITION(code_address, 0x582); 1681ASSERT_REG_POSITION(code_address, 0x582);
1691ASSERT_REG_POSITION(draw, 0x585); 1682ASSERT_REG_POSITION(draw, 0x585);
1692ASSERT_REG_POSITION(primitive_restart, 0x591); 1683ASSERT_REG_POSITION(primitive_restart, 0x591);
1684ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1);
1693ASSERT_REG_POSITION(index_array, 0x5F2); 1685ASSERT_REG_POSITION(index_array, 0x5F2);
1694ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); 1686ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
1695ASSERT_REG_POSITION(instanced_arrays, 0x620); 1687ASSERT_REG_POSITION(instanced_arrays, 0x620);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index c51776466..c7ec1eac9 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -127,7 +127,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
127 127
128 // Optimized path for micro copies. 128 // Optimized path for micro copies.
129 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; 129 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
130 if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) { 130 if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X &&
131 regs.src_params.height > GOB_SIZE_Y) {
131 FastCopyBlockLinearToPitch(); 132 FastCopyBlockLinearToPitch();
132 return; 133 return;
133 } 134 }
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
deleted file mode 100644
index 8b45f1b62..000000000
--- a/src/video_core/engines/shader_bytecode.h
+++ /dev/null
@@ -1,2298 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <bitset>
9#include <optional>
10#include <tuple>
11#include <vector>
12
13#include "common/assert.h"
14#include "common/bit_field.h"
15#include "common/common_types.h"
16
17namespace Tegra::Shader {
18
19struct Register {
20 /// Number of registers
21 static constexpr std::size_t NumRegisters = 256;
22
23 /// Register 255 is special cased to always be 0
24 static constexpr std::size_t ZeroIndex = 255;
25
26 enum class Size : u64 {
27 Byte = 0,
28 Short = 1,
29 Word = 2,
30 Long = 3,
31 };
32
33 constexpr Register() = default;
34
35 constexpr Register(u64 value_) : value(value_) {}
36
37 [[nodiscard]] constexpr operator u64() const {
38 return value;
39 }
40
41 template <typename T>
42 [[nodiscard]] constexpr u64 operator-(const T& oth) const {
43 return value - oth;
44 }
45
46 template <typename T>
47 [[nodiscard]] constexpr u64 operator&(const T& oth) const {
48 return value & oth;
49 }
50
51 [[nodiscard]] constexpr u64 operator&(const Register& oth) const {
52 return value & oth.value;
53 }
54
55 [[nodiscard]] constexpr u64 operator~() const {
56 return ~value;
57 }
58
59 [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const {
60 elem = (value + elem) & 3;
61 return (value & ~3) + elem;
62 }
63
64private:
65 u64 value{};
66};
67
68enum class AttributeSize : u64 {
69 Word = 0,
70 DoubleWord = 1,
71 TripleWord = 2,
72 QuadWord = 3,
73};
74
75union Attribute {
76 Attribute() = default;
77
78 constexpr explicit Attribute(u64 value_) : value(value_) {}
79
80 enum class Index : u64 {
81 LayerViewportPointSize = 6,
82 Position = 7,
83 Attribute_0 = 8,
84 Attribute_31 = 39,
85 FrontColor = 40,
86 FrontSecondaryColor = 41,
87 BackColor = 42,
88 BackSecondaryColor = 43,
89 ClipDistances0123 = 44,
90 ClipDistances4567 = 45,
91 PointCoord = 46,
92 // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex
93 // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
94 // shader.
95 TessCoordInstanceIDVertexID = 47,
96 TexCoord_0 = 48,
97 TexCoord_7 = 55,
98 // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment
99 // shader. It is unknown what the other values contain.
100 FrontFacing = 63,
101 };
102
103 union {
104 BitField<20, 10, u64> immediate;
105 BitField<22, 2, u64> element;
106 BitField<24, 6, Index> index;
107 BitField<31, 1, u64> patch;
108 BitField<47, 3, AttributeSize> size;
109
110 [[nodiscard]] bool IsPhysical() const {
111 return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
112 }
113 } fmt20;
114
115 union {
116 BitField<30, 2, u64> element;
117 BitField<32, 6, Index> index;
118 } fmt28;
119
120 BitField<39, 8, u64> reg;
121 u64 value{};
122};
123
124union Sampler {
125 Sampler() = default;
126
127 constexpr explicit Sampler(u64 value_) : value(value_) {}
128
129 enum class Index : u64 {
130 Sampler_0 = 8,
131 };
132
133 BitField<36, 13, Index> index;
134 u64 value{};
135};
136
137union Image {
138 Image() = default;
139
140 constexpr explicit Image(u64 value_) : value{value_} {}
141
142 BitField<36, 13, u64> index;
143 u64 value;
144};
145
146} // namespace Tegra::Shader
147
148namespace std {
149
150// TODO(bunnei): The below is forbidden by the C++ standard, but works fine. See #330.
151template <>
152struct make_unsigned<Tegra::Shader::Attribute> {
153 using type = Tegra::Shader::Attribute;
154};
155
156template <>
157struct make_unsigned<Tegra::Shader::Register> {
158 using type = Tegra::Shader::Register;
159};
160
161} // namespace std
162
163namespace Tegra::Shader {
164
165enum class Pred : u64 {
166 UnusedIndex = 0x7,
167 NeverExecute = 0xF,
168};
169
170enum class PredCondition : u64 {
171 F = 0, // Always false
172 LT = 1, // Ordered less than
173 EQ = 2, // Ordered equal
174 LE = 3, // Ordered less than or equal
175 GT = 4, // Ordered greater than
176 NE = 5, // Ordered not equal
177 GE = 6, // Ordered greater than or equal
178 NUM = 7, // Ordered
179 NAN_ = 8, // Unordered
180 LTU = 9, // Unordered less than
181 EQU = 10, // Unordered equal
182 LEU = 11, // Unordered less than or equal
183 GTU = 12, // Unordered greater than
184 NEU = 13, // Unordered not equal
185 GEU = 14, // Unordered greater than or equal
186 T = 15, // Always true
187};
188
189enum class PredOperation : u64 {
190 And = 0,
191 Or = 1,
192 Xor = 2,
193};
194
195enum class LogicOperation : u64 {
196 And = 0,
197 Or = 1,
198 Xor = 2,
199 PassB = 3,
200};
201
202enum class SubOp : u64 {
203 Cos = 0x0,
204 Sin = 0x1,
205 Ex2 = 0x2,
206 Lg2 = 0x3,
207 Rcp = 0x4,
208 Rsq = 0x5,
209 Sqrt = 0x8,
210};
211
212enum class F2iRoundingOp : u64 {
213 RoundEven = 0,
214 Floor = 1,
215 Ceil = 2,
216 Trunc = 3,
217};
218
219enum class F2fRoundingOp : u64 {
220 None = 0,
221 Pass = 3,
222 Round = 8,
223 Floor = 9,
224 Ceil = 10,
225 Trunc = 11,
226};
227
228enum class AtomicOp : u64 {
229 Add = 0,
230 Min = 1,
231 Max = 2,
232 Inc = 3,
233 Dec = 4,
234 And = 5,
235 Or = 6,
236 Xor = 7,
237 Exch = 8,
238 SafeAdd = 10,
239};
240
241enum class GlobalAtomicType : u64 {
242 U32 = 0,
243 S32 = 1,
244 U64 = 2,
245 F32_FTZ_RN = 3,
246 F16x2_FTZ_RN = 4,
247 S64 = 5,
248};
249
250enum class UniformType : u64 {
251 UnsignedByte = 0,
252 SignedByte = 1,
253 UnsignedShort = 2,
254 SignedShort = 3,
255 Single = 4,
256 Double = 5,
257 Quad = 6,
258 UnsignedQuad = 7,
259};
260
261enum class StoreType : u64 {
262 Unsigned8 = 0,
263 Signed8 = 1,
264 Unsigned16 = 2,
265 Signed16 = 3,
266 Bits32 = 4,
267 Bits64 = 5,
268 Bits128 = 6,
269};
270
271enum class AtomicType : u64 {
272 U32 = 0,
273 S32 = 1,
274 U64 = 2,
275 S64 = 3,
276};
277
278enum class IMinMaxExchange : u64 {
279 None = 0,
280 XLo = 1,
281 XMed = 2,
282 XHi = 3,
283};
284
285enum class VideoType : u64 {
286 Size16_Low = 0,
287 Size16_High = 1,
288 Size32 = 2,
289 Invalid = 3,
290};
291
292enum class VmadShr : u64 {
293 Shr7 = 1,
294 Shr15 = 2,
295};
296
297enum class VmnmxType : u64 {
298 Bits8,
299 Bits16,
300 Bits32,
301};
302
303enum class VmnmxOperation : u64 {
304 Mrg_16H = 0,
305 Mrg_16L = 1,
306 Mrg_8B0 = 2,
307 Mrg_8B2 = 3,
308 Acc = 4,
309 Min = 5,
310 Max = 6,
311 Nop = 7,
312};
313
314enum class XmadMode : u64 {
315 None = 0,
316 CLo = 1,
317 CHi = 2,
318 CSfu = 3,
319 CBcc = 4,
320};
321
322enum class IAdd3Mode : u64 {
323 None = 0,
324 RightShift = 1,
325 LeftShift = 2,
326};
327
328enum class IAdd3Height : u64 {
329 None = 0,
330 LowerHalfWord = 1,
331 UpperHalfWord = 2,
332};
333
334enum class FlowCondition : u64 {
335 Always = 0xF,
336 Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
337};
338
339enum class ConditionCode : u64 {
340 F = 0,
341 LT = 1,
342 EQ = 2,
343 LE = 3,
344 GT = 4,
345 NE = 5,
346 GE = 6,
347 Num = 7,
348 Nan = 8,
349 LTU = 9,
350 EQU = 10,
351 LEU = 11,
352 GTU = 12,
353 NEU = 13,
354 GEU = 14,
355 T = 15,
356 OFF = 16,
357 LO = 17,
358 SFF = 18,
359 LS = 19,
360 HI = 20,
361 SFT = 21,
362 HS = 22,
363 OFT = 23,
364 CSM_TA = 24,
365 CSM_TR = 25,
366 CSM_MX = 26,
367 FCSM_TA = 27,
368 FCSM_TR = 28,
369 FCSM_MX = 29,
370 RLE = 30,
371 RGT = 31,
372};
373
374enum class PredicateResultMode : u64 {
375 None = 0x0,
376 NotZero = 0x3,
377};
378
379enum class TextureType : u64 {
380 Texture1D = 0,
381 Texture2D = 1,
382 Texture3D = 2,
383 TextureCube = 3,
384};
385
386enum class TextureQueryType : u64 {
387 Dimension = 1,
388 TextureType = 2,
389 SamplePosition = 5,
390 Filter = 16,
391 LevelOfDetail = 18,
392 Wrap = 20,
393 BorderColor = 22,
394};
395
396enum class TextureProcessMode : u64 {
397 None = 0,
398 LZ = 1, // Load LOD of zero.
399 LB = 2, // Load Bias.
400 LL = 3, // Load LOD.
401 LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
402 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL.
403};
404
405enum class TextureMiscMode : u64 {
406 DC,
407 AOFFI, // Uses Offset
408 NDV,
409 NODEP,
410 MZ,
411 PTP,
412};
413
414enum class SurfaceDataMode : u64 {
415 P = 0,
416 D_BA = 1,
417};
418
419enum class OutOfBoundsStore : u64 {
420 Ignore = 0,
421 Clamp = 1,
422 Trap = 2,
423};
424
425enum class ImageType : u64 {
426 Texture1D = 0,
427 TextureBuffer = 1,
428 Texture1DArray = 2,
429 Texture2D = 3,
430 Texture2DArray = 4,
431 Texture3D = 5,
432};
433
434enum class IsberdMode : u64 {
435 None = 0,
436 Patch = 1,
437 Prim = 2,
438 Attr = 3,
439};
440
441enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 };
442
443enum class MembarType : u64 {
444 CTA = 0,
445 GL = 1,
446 SYS = 2,
447 VC = 3,
448};
449
450enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 };
451
452enum class HalfType : u64 {
453 H0_H1 = 0,
454 F32 = 1,
455 H0_H0 = 2,
456 H1_H1 = 3,
457};
458
459enum class HalfMerge : u64 {
460 H0_H1 = 0,
461 F32 = 1,
462 Mrg_H0 = 2,
463 Mrg_H1 = 3,
464};
465
466enum class HalfPrecision : u64 {
467 None = 0,
468 FTZ = 1,
469 FMZ = 2,
470};
471
472enum class R2pMode : u64 {
473 Pr = 0,
474 Cc = 1,
475};
476
477enum class IpaInterpMode : u64 {
478 Pass = 0,
479 Multiply = 1,
480 Constant = 2,
481 Sc = 3,
482};
483
484enum class IpaSampleMode : u64 {
485 Default = 0,
486 Centroid = 1,
487 Offset = 2,
488};
489
490enum class LmemLoadCacheManagement : u64 {
491 Default = 0,
492 LU = 1,
493 CI = 2,
494 CV = 3,
495};
496
497enum class StoreCacheManagement : u64 {
498 Default = 0,
499 CG = 1,
500 CS = 2,
501 WT = 3,
502};
503
504struct IpaMode {
505 IpaInterpMode interpolation_mode;
506 IpaSampleMode sampling_mode;
507
508 [[nodiscard]] bool operator==(const IpaMode& a) const {
509 return std::tie(interpolation_mode, sampling_mode) ==
510 std::tie(a.interpolation_mode, a.sampling_mode);
511 }
512 [[nodiscard]] bool operator!=(const IpaMode& a) const {
513 return !operator==(a);
514 }
515 [[nodiscard]] bool operator<(const IpaMode& a) const {
516 return std::tie(interpolation_mode, sampling_mode) <
517 std::tie(a.interpolation_mode, a.sampling_mode);
518 }
519};
520
521enum class SystemVariable : u64 {
522 LaneId = 0x00,
523 VirtCfg = 0x02,
524 VirtId = 0x03,
525 Pm0 = 0x04,
526 Pm1 = 0x05,
527 Pm2 = 0x06,
528 Pm3 = 0x07,
529 Pm4 = 0x08,
530 Pm5 = 0x09,
531 Pm6 = 0x0a,
532 Pm7 = 0x0b,
533 OrderingTicket = 0x0f,
534 PrimType = 0x10,
535 InvocationId = 0x11,
536 Ydirection = 0x12,
537 ThreadKill = 0x13,
538 ShaderType = 0x14,
539 DirectBeWriteAddressLow = 0x15,
540 DirectBeWriteAddressHigh = 0x16,
541 DirectBeWriteEnabled = 0x17,
542 MachineId0 = 0x18,
543 MachineId1 = 0x19,
544 MachineId2 = 0x1a,
545 MachineId3 = 0x1b,
546 Affinity = 0x1c,
547 InvocationInfo = 0x1d,
548 WscaleFactorXY = 0x1e,
549 WscaleFactorZ = 0x1f,
550 Tid = 0x20,
551 TidX = 0x21,
552 TidY = 0x22,
553 TidZ = 0x23,
554 CtaParam = 0x24,
555 CtaIdX = 0x25,
556 CtaIdY = 0x26,
557 CtaIdZ = 0x27,
558 NtId = 0x28,
559 CirQueueIncrMinusOne = 0x29,
560 Nlatc = 0x2a,
561 SmSpaVersion = 0x2c,
562 MultiPassShaderInfo = 0x2d,
563 LwinHi = 0x2e,
564 SwinHi = 0x2f,
565 SwinLo = 0x30,
566 SwinSz = 0x31,
567 SmemSz = 0x32,
568 SmemBanks = 0x33,
569 LwinLo = 0x34,
570 LwinSz = 0x35,
571 LmemLosz = 0x36,
572 LmemHioff = 0x37,
573 EqMask = 0x38,
574 LtMask = 0x39,
575 LeMask = 0x3a,
576 GtMask = 0x3b,
577 GeMask = 0x3c,
578 RegAlloc = 0x3d,
579 CtxAddr = 0x3e, // .fmask = F_SM50
580 BarrierAlloc = 0x3e, // .fmask = F_SM60
581 GlobalErrorStatus = 0x40,
582 WarpErrorStatus = 0x42,
583 WarpErrorStatusClear = 0x43,
584 PmHi0 = 0x48,
585 PmHi1 = 0x49,
586 PmHi2 = 0x4a,
587 PmHi3 = 0x4b,
588 PmHi4 = 0x4c,
589 PmHi5 = 0x4d,
590 PmHi6 = 0x4e,
591 PmHi7 = 0x4f,
592 ClockLo = 0x50,
593 ClockHi = 0x51,
594 GlobalTimerLo = 0x52,
595 GlobalTimerHi = 0x53,
596 HwTaskId = 0x60,
597 CircularQueueEntryIndex = 0x61,
598 CircularQueueEntryAddressLow = 0x62,
599 CircularQueueEntryAddressHigh = 0x63,
600};
601
602enum class PhysicalAttributeDirection : u64 {
603 Input = 0,
604 Output = 1,
605};
606
607enum class VoteOperation : u64 {
608 All = 0, // allThreadsNV
609 Any = 1, // anyThreadNV
610 Eq = 2, // allThreadsEqualNV
611};
612
613enum class ImageAtomicOperationType : u64 {
614 U32 = 0,
615 S32 = 1,
616 U64 = 2,
617 F32 = 3,
618 S64 = 5,
619 SD32 = 6,
620 SD64 = 7,
621};
622
623enum class ImageAtomicOperation : u64 {
624 Add = 0,
625 Min = 1,
626 Max = 2,
627 Inc = 3,
628 Dec = 4,
629 And = 5,
630 Or = 6,
631 Xor = 7,
632 Exch = 8,
633};
634
635enum class ShuffleOperation : u64 {
636 Idx = 0, // shuffleNV
637 Up = 1, // shuffleUpNV
638 Down = 2, // shuffleDownNV
639 Bfly = 3, // shuffleXorNV
640};
641
642enum class ShfType : u64 {
643 Bits32 = 0,
644 U64 = 2,
645 S64 = 3,
646};
647
648enum class ShfXmode : u64 {
649 None = 0,
650 HI = 1,
651 X = 2,
652 XHI = 3,
653};
654
655union Instruction {
656 constexpr Instruction& operator=(const Instruction& instr) {
657 value = instr.value;
658 return *this;
659 }
660
661 constexpr Instruction(u64 value_) : value{value_} {}
662 constexpr Instruction(const Instruction& instr) : value(instr.value) {}
663
664 [[nodiscard]] constexpr bool Bit(u64 offset) const {
665 return ((value >> offset) & 1) != 0;
666 }
667
668 BitField<0, 8, Register> gpr0;
669 BitField<8, 8, Register> gpr8;
670 union {
671 BitField<16, 4, Pred> full_pred;
672 BitField<16, 3, u64> pred_index;
673 } pred;
674 BitField<19, 1, u64> negate_pred;
675 BitField<20, 8, Register> gpr20;
676 BitField<20, 4, SubOp> sub_op;
677 BitField<28, 8, Register> gpr28;
678 BitField<39, 8, Register> gpr39;
679 BitField<48, 16, u64> opcode;
680
681 union {
682 BitField<8, 5, ConditionCode> cc;
683 BitField<13, 1, u64> trigger;
684 } nop;
685
686 union {
687 BitField<48, 2, VoteOperation> operation;
688 BitField<45, 3, u64> dest_pred;
689 BitField<39, 3, u64> value;
690 BitField<42, 1, u64> negate_value;
691 } vote;
692
693 union {
694 BitField<30, 2, ShuffleOperation> operation;
695 BitField<48, 3, u64> pred48;
696 BitField<28, 1, u64> is_index_imm;
697 BitField<29, 1, u64> is_mask_imm;
698 BitField<20, 5, u64> index_imm;
699 BitField<34, 13, u64> mask_imm;
700 } shfl;
701
702 union {
703 BitField<44, 1, u64> ftz;
704 BitField<39, 2, u64> tab5cb8_2;
705 BitField<38, 1, u64> ndv;
706 BitField<47, 1, u64> cc;
707 BitField<28, 8, u64> swizzle;
708 } fswzadd;
709
710 union {
711 BitField<8, 8, Register> gpr;
712 BitField<20, 24, s64> offset;
713 } gmem;
714
715 union {
716 BitField<20, 16, u64> imm20_16;
717 BitField<20, 19, u64> imm20_19;
718 BitField<20, 32, s64> imm20_32;
719 BitField<45, 1, u64> negate_b;
720 BitField<46, 1, u64> abs_a;
721 BitField<48, 1, u64> negate_a;
722 BitField<49, 1, u64> abs_b;
723 BitField<50, 1, u64> saturate_d;
724 BitField<56, 1, u64> negate_imm;
725
726 union {
727 BitField<39, 3, u64> pred;
728 BitField<42, 1, u64> negate_pred;
729 } fmnmx;
730
731 union {
732 BitField<39, 1, u64> invert_a;
733 BitField<40, 1, u64> invert_b;
734 BitField<41, 2, LogicOperation> operation;
735 BitField<44, 2, PredicateResultMode> pred_result_mode;
736 BitField<48, 3, Pred> pred48;
737 } lop;
738
739 union {
740 BitField<53, 2, LogicOperation> operation;
741 BitField<55, 1, u64> invert_a;
742 BitField<56, 1, u64> invert_b;
743 } lop32i;
744
745 union {
746 BitField<28, 8, u64> imm_lut28;
747 BitField<48, 8, u64> imm_lut48;
748
749 [[nodiscard]] u32 GetImmLut28() const {
750 return static_cast<u32>(imm_lut28);
751 }
752
753 [[nodiscard]] u32 GetImmLut48() const {
754 return static_cast<u32>(imm_lut48);
755 }
756 } lop3;
757
758 [[nodiscard]] u16 GetImm20_16() const {
759 return static_cast<u16>(imm20_16);
760 }
761
762 [[nodiscard]] u32 GetImm20_19() const {
763 u32 imm{static_cast<u32>(imm20_19)};
764 imm <<= 12;
765 imm |= negate_imm ? 0x80000000 : 0;
766 return imm;
767 }
768
769 [[nodiscard]] u32 GetImm20_32() const {
770 return static_cast<u32>(imm20_32);
771 }
772
773 [[nodiscard]] s32 GetSignedImm20_20() const {
774 const auto immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
775 // Sign extend the 20-bit value.
776 const auto mask = 1U << (20 - 1);
777 return static_cast<s32>((immediate ^ mask) - mask);
778 }
779 } alu;
780
781 union {
782 BitField<38, 1, u64> idx;
783 BitField<51, 1, u64> saturate;
784 BitField<52, 2, IpaSampleMode> sample_mode;
785 BitField<54, 2, IpaInterpMode> interp_mode;
786 } ipa;
787
788 union {
789 BitField<39, 2, u64> tab5cb8_2;
790 BitField<41, 3, u64> postfactor;
791 BitField<44, 2, u64> tab5c68_0;
792 BitField<48, 1, u64> negate_b;
793 } fmul;
794
795 union {
796 BitField<55, 1, u64> saturate;
797 } fmul32;
798
799 union {
800 BitField<52, 1, u64> generates_cc;
801 } op_32;
802
803 union {
804 BitField<48, 1, u64> is_signed;
805 } shift;
806
807 union {
808 BitField<39, 1, u64> wrap;
809 } shr;
810
811 union {
812 BitField<37, 2, ShfType> type;
813 BitField<48, 2, ShfXmode> xmode;
814 BitField<50, 1, u64> wrap;
815 BitField<20, 6, u64> immediate;
816 } shf;
817
818 union {
819 BitField<39, 5, u64> shift_amount;
820 BitField<48, 1, u64> negate_b;
821 BitField<49, 1, u64> negate_a;
822 } alu_integer;
823
824 union {
825 BitField<43, 1, u64> x;
826 } iadd;
827
828 union {
829 BitField<39, 1, u64> ftz;
830 BitField<32, 1, u64> saturate;
831 BitField<49, 2, HalfMerge> merge;
832
833 BitField<44, 1, u64> abs_a;
834 BitField<47, 2, HalfType> type_a;
835
836 BitField<30, 1, u64> abs_b;
837 BitField<28, 2, HalfType> type_b;
838
839 BitField<35, 2, HalfType> type_c;
840 } alu_half;
841
842 union {
843 BitField<39, 2, HalfPrecision> precision;
844 BitField<39, 1, u64> ftz;
845 BitField<52, 1, u64> saturate;
846 BitField<49, 2, HalfMerge> merge;
847
848 BitField<43, 1, u64> negate_a;
849 BitField<44, 1, u64> abs_a;
850 BitField<47, 2, HalfType> type_a;
851 } alu_half_imm;
852
853 union {
854 BitField<29, 1, u64> first_negate;
855 BitField<20, 9, u64> first;
856
857 BitField<56, 1, u64> second_negate;
858 BitField<30, 9, u64> second;
859
860 [[nodiscard]] u32 PackImmediates() const {
861 // Immediates are half floats shifted.
862 constexpr u32 imm_shift = 6;
863 return static_cast<u32>((first << imm_shift) | (second << (16 + imm_shift)));
864 }
865 } half_imm;
866
867 union {
868 union {
869 BitField<37, 2, HalfPrecision> precision;
870 BitField<32, 1, u64> saturate;
871
872 BitField<31, 1, u64> negate_b;
873 BitField<30, 1, u64> negate_c;
874 BitField<35, 2, HalfType> type_c;
875 } rr;
876
877 BitField<57, 2, HalfPrecision> precision;
878 BitField<52, 1, u64> saturate;
879
880 BitField<49, 2, HalfMerge> merge;
881
882 BitField<47, 2, HalfType> type_a;
883
884 BitField<56, 1, u64> negate_b;
885 BitField<28, 2, HalfType> type_b;
886
887 BitField<51, 1, u64> negate_c;
888 BitField<53, 2, HalfType> type_reg39;
889 } hfma2;
890
891 union {
892 BitField<40, 1, u64> invert;
893 } popc;
894
895 union {
896 BitField<41, 1, u64> sh;
897 BitField<40, 1, u64> invert;
898 BitField<48, 1, u64> is_signed;
899 } flo;
900
901 union {
902 BitField<39, 3, u64> pred;
903 BitField<42, 1, u64> neg_pred;
904 } sel;
905
906 union {
907 BitField<39, 3, u64> pred;
908 BitField<42, 1, u64> negate_pred;
909 BitField<43, 2, IMinMaxExchange> exchange;
910 BitField<48, 1, u64> is_signed;
911 } imnmx;
912
913 union {
914 BitField<31, 2, IAdd3Height> height_c;
915 BitField<33, 2, IAdd3Height> height_b;
916 BitField<35, 2, IAdd3Height> height_a;
917 BitField<37, 2, IAdd3Mode> mode;
918 BitField<49, 1, u64> neg_c;
919 BitField<50, 1, u64> neg_b;
920 BitField<51, 1, u64> neg_a;
921 } iadd3;
922
923 union {
924 BitField<54, 1, u64> saturate;
925 BitField<56, 1, u64> negate_a;
926 } iadd32i;
927
928 union {
929 BitField<53, 1, u64> negate_b;
930 BitField<54, 1, u64> abs_a;
931 BitField<56, 1, u64> negate_a;
932 BitField<57, 1, u64> abs_b;
933 } fadd32i;
934
935 union {
936 BitField<40, 1, u64> brev;
937 BitField<47, 1, u64> rd_cc;
938 BitField<48, 1, u64> is_signed;
939 } bfe;
940
941 union {
942 BitField<48, 3, u64> pred48;
943
944 union {
945 BitField<20, 20, u64> entry_a;
946 BitField<39, 5, u64> entry_b;
947 BitField<45, 1, u64> neg;
948 BitField<46, 1, u64> uses_cc;
949 } imm;
950
951 union {
952 BitField<20, 14, u64> cb_index;
953 BitField<34, 5, u64> cb_offset;
954 BitField<56, 1, u64> neg;
955 BitField<57, 1, u64> uses_cc;
956 } hi;
957
958 union {
959 BitField<20, 14, u64> cb_index;
960 BitField<34, 5, u64> cb_offset;
961 BitField<39, 5, u64> entry_a;
962 BitField<45, 1, u64> neg;
963 BitField<46, 1, u64> uses_cc;
964 } rz;
965
966 union {
967 BitField<39, 5, u64> entry_a;
968 BitField<45, 1, u64> neg;
969 BitField<46, 1, u64> uses_cc;
970 } r1;
971
972 union {
973 BitField<28, 8, u64> entry_a;
974 BitField<37, 1, u64> neg;
975 BitField<38, 1, u64> uses_cc;
976 } r2;
977
978 } lea;
979
980 union {
981 BitField<0, 5, FlowCondition> cond;
982 } flow;
983
984 union {
985 BitField<47, 1, u64> cc;
986 BitField<48, 1, u64> negate_b;
987 BitField<49, 1, u64> negate_c;
988 BitField<51, 2, u64> tab5980_1;
989 BitField<53, 2, u64> tab5980_0;
990 } ffma;
991
992 union {
993 BitField<48, 3, UniformType> type;
994 BitField<44, 2, u64> unknown;
995 } ld_c;
996
997 union {
998 BitField<48, 3, StoreType> type;
999 } ldst_sl;
1000
1001 union {
1002 BitField<44, 2, u64> unknown;
1003 } ld_l;
1004
1005 union {
1006 BitField<44, 2, StoreCacheManagement> cache_management;
1007 } st_l;
1008
1009 union {
1010 BitField<48, 3, UniformType> type;
1011 BitField<46, 2, u64> cache_mode;
1012 } ldg;
1013
1014 union {
1015 BitField<48, 3, UniformType> type;
1016 BitField<46, 2, u64> cache_mode;
1017 } stg;
1018
1019 union {
1020 BitField<23, 3, AtomicOp> operation;
1021 BitField<48, 1, u64> extended;
1022 BitField<20, 3, GlobalAtomicType> type;
1023 } red;
1024
1025 union {
1026 BitField<52, 4, AtomicOp> operation;
1027 BitField<49, 3, GlobalAtomicType> type;
1028 BitField<28, 20, s64> offset;
1029 } atom;
1030
1031 union {
1032 BitField<52, 4, AtomicOp> operation;
1033 BitField<28, 2, AtomicType> type;
1034 BitField<30, 22, s64> offset;
1035
1036 [[nodiscard]] s32 GetImmediateOffset() const {
1037 return static_cast<s32>(offset << 2);
1038 }
1039 } atoms;
1040
1041 union {
1042 BitField<32, 1, PhysicalAttributeDirection> direction;
1043 BitField<47, 3, AttributeSize> size;
1044 BitField<20, 11, u64> address;
1045 } al2p;
1046
1047 union {
1048 BitField<53, 3, UniformType> type;
1049 BitField<52, 1, u64> extended;
1050 } generic;
1051
1052 union {
1053 BitField<0, 3, u64> pred0;
1054 BitField<3, 3, u64> pred3;
1055 BitField<6, 1, u64> neg_b;
1056 BitField<7, 1, u64> abs_a;
1057 BitField<39, 3, u64> pred39;
1058 BitField<42, 1, u64> neg_pred;
1059 BitField<43, 1, u64> neg_a;
1060 BitField<44, 1, u64> abs_b;
1061 BitField<45, 2, PredOperation> op;
1062 BitField<47, 1, u64> ftz;
1063 BitField<48, 4, PredCondition> cond;
1064 } fsetp;
1065
1066 union {
1067 BitField<0, 3, u64> pred0;
1068 BitField<3, 3, u64> pred3;
1069 BitField<39, 3, u64> pred39;
1070 BitField<42, 1, u64> neg_pred;
1071 BitField<45, 2, PredOperation> op;
1072 BitField<48, 1, u64> is_signed;
1073 BitField<49, 3, PredCondition> cond;
1074 } isetp;
1075
1076 union {
1077 BitField<48, 1, u64> is_signed;
1078 BitField<49, 3, PredCondition> cond;
1079 } icmp;
1080
1081 union {
1082 BitField<0, 3, u64> pred0;
1083 BitField<3, 3, u64> pred3;
1084 BitField<12, 3, u64> pred12;
1085 BitField<15, 1, u64> neg_pred12;
1086 BitField<24, 2, PredOperation> cond;
1087 BitField<29, 3, u64> pred29;
1088 BitField<32, 1, u64> neg_pred29;
1089 BitField<39, 3, u64> pred39;
1090 BitField<42, 1, u64> neg_pred39;
1091 BitField<45, 2, PredOperation> op;
1092 } psetp;
1093
1094 union {
1095 BitField<43, 4, PredCondition> cond;
1096 BitField<45, 2, PredOperation> op;
1097 BitField<3, 3, u64> pred3;
1098 BitField<0, 3, u64> pred0;
1099 BitField<39, 3, u64> pred39;
1100 } vsetp;
1101
1102 union {
1103 BitField<12, 3, u64> pred12;
1104 BitField<15, 1, u64> neg_pred12;
1105 BitField<24, 2, PredOperation> cond;
1106 BitField<29, 3, u64> pred29;
1107 BitField<32, 1, u64> neg_pred29;
1108 BitField<39, 3, u64> pred39;
1109 BitField<42, 1, u64> neg_pred39;
1110 BitField<44, 1, u64> bf;
1111 BitField<45, 2, PredOperation> op;
1112 } pset;
1113
1114 union {
1115 BitField<0, 3, u64> pred0;
1116 BitField<3, 3, u64> pred3;
1117 BitField<8, 5, ConditionCode> cc; // flag in cc
1118 BitField<39, 3, u64> pred39;
1119 BitField<42, 1, u64> neg_pred39;
1120 BitField<45, 4, PredOperation> op; // op with pred39
1121 } csetp;
1122
1123 union {
1124 BitField<6, 1, u64> ftz;
1125 BitField<45, 2, PredOperation> op;
1126 BitField<3, 3, u64> pred3;
1127 BitField<0, 3, u64> pred0;
1128 BitField<43, 1, u64> negate_a;
1129 BitField<44, 1, u64> abs_a;
1130 BitField<47, 2, HalfType> type_a;
1131 union {
1132 BitField<35, 4, PredCondition> cond;
1133 BitField<49, 1, u64> h_and;
1134 BitField<31, 1, u64> negate_b;
1135 BitField<30, 1, u64> abs_b;
1136 BitField<28, 2, HalfType> type_b;
1137 } reg;
1138 union {
1139 BitField<56, 1, u64> negate_b;
1140 BitField<54, 1, u64> abs_b;
1141 } cbuf;
1142 union {
1143 BitField<49, 4, PredCondition> cond;
1144 BitField<53, 1, u64> h_and;
1145 } cbuf_and_imm;
1146 BitField<42, 1, u64> neg_pred;
1147 BitField<39, 3, u64> pred39;
1148 } hsetp2;
1149
1150 union {
1151 BitField<40, 1, R2pMode> mode;
1152 BitField<41, 2, u64> byte;
1153 BitField<20, 7, u64> immediate_mask;
1154 } p2r_r2p;
1155
1156 union {
1157 BitField<39, 3, u64> pred39;
1158 BitField<42, 1, u64> neg_pred;
1159 BitField<43, 1, u64> neg_a;
1160 BitField<44, 1, u64> abs_b;
1161 BitField<45, 2, PredOperation> op;
1162 BitField<48, 4, PredCondition> cond;
1163 BitField<52, 1, u64> bf;
1164 BitField<53, 1, u64> neg_b;
1165 BitField<54, 1, u64> abs_a;
1166 BitField<55, 1, u64> ftz;
1167 } fset;
1168
1169 union {
1170 BitField<47, 1, u64> ftz;
1171 BitField<48, 4, PredCondition> cond;
1172 } fcmp;
1173
1174 union {
1175 BitField<49, 1, u64> bf;
1176 BitField<35, 3, PredCondition> cond;
1177 BitField<50, 1, u64> ftz;
1178 BitField<45, 2, PredOperation> op;
1179 BitField<43, 1, u64> negate_a;
1180 BitField<44, 1, u64> abs_a;
1181 BitField<47, 2, HalfType> type_a;
1182 BitField<31, 1, u64> negate_b;
1183 BitField<30, 1, u64> abs_b;
1184 BitField<28, 2, HalfType> type_b;
1185 BitField<42, 1, u64> neg_pred;
1186 BitField<39, 3, u64> pred39;
1187 } hset2;
1188
1189 union {
1190 BitField<39, 3, u64> pred39;
1191 BitField<42, 1, u64> neg_pred;
1192 BitField<44, 1, u64> bf;
1193 BitField<45, 2, PredOperation> op;
1194 BitField<48, 1, u64> is_signed;
1195 BitField<49, 3, PredCondition> cond;
1196 } iset;
1197
1198 union {
1199 BitField<45, 1, u64> negate_a;
1200 BitField<49, 1, u64> abs_a;
1201 BitField<10, 2, Register::Size> src_size;
1202 BitField<13, 1, u64> is_input_signed;
1203 BitField<8, 2, Register::Size> dst_size;
1204 BitField<12, 1, u64> is_output_signed;
1205
1206 union {
1207 BitField<39, 2, u64> tab5cb8_2;
1208 } i2f;
1209
1210 union {
1211 BitField<39, 2, F2iRoundingOp> rounding;
1212 } f2i;
1213
1214 union {
1215 BitField<39, 4, u64> rounding;
1216 // H0, H1 extract for F16 missing
1217 BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
1218 [[nodiscard]] F2fRoundingOp GetRoundingMode() const {
1219 constexpr u64 rounding_mask = 0x0B;
1220 return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask);
1221 }
1222 } f2f;
1223
1224 union {
1225 BitField<41, 2, u64> selector;
1226 } int_src;
1227
1228 union {
1229 BitField<41, 1, u64> selector;
1230 } float_src;
1231 } conversion;
1232
1233 union {
1234 BitField<28, 1, u64> array;
1235 BitField<29, 2, TextureType> texture_type;
1236 BitField<31, 4, u64> component_mask;
1237 BitField<49, 1, u64> nodep_flag;
1238 BitField<50, 1, u64> dc_flag;
1239 BitField<54, 1, u64> aoffi_flag;
1240 BitField<55, 3, TextureProcessMode> process_mode;
1241
1242 [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
1243 return ((1ULL << component) & component_mask) != 0;
1244 }
1245
1246 [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
1247 return process_mode;
1248 }
1249
1250 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1251 switch (mode) {
1252 case TextureMiscMode::DC:
1253 return dc_flag != 0;
1254 case TextureMiscMode::NODEP:
1255 return nodep_flag != 0;
1256 case TextureMiscMode::AOFFI:
1257 return aoffi_flag != 0;
1258 default:
1259 break;
1260 }
1261 return false;
1262 }
1263 } tex;
1264
1265 union {
1266 BitField<28, 1, u64> array;
1267 BitField<29, 2, TextureType> texture_type;
1268 BitField<31, 4, u64> component_mask;
1269 BitField<49, 1, u64> nodep_flag;
1270 BitField<50, 1, u64> dc_flag;
1271 BitField<36, 1, u64> aoffi_flag;
1272 BitField<37, 3, TextureProcessMode> process_mode;
1273
1274 [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
1275 return ((1ULL << component) & component_mask) != 0;
1276 }
1277
1278 [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
1279 return process_mode;
1280 }
1281
1282 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1283 switch (mode) {
1284 case TextureMiscMode::DC:
1285 return dc_flag != 0;
1286 case TextureMiscMode::NODEP:
1287 return nodep_flag != 0;
1288 case TextureMiscMode::AOFFI:
1289 return aoffi_flag != 0;
1290 default:
1291 break;
1292 }
1293 return false;
1294 }
1295 } tex_b;
1296
1297 union {
1298 BitField<22, 6, TextureQueryType> query_type;
1299 BitField<31, 4, u64> component_mask;
1300 BitField<49, 1, u64> nodep_flag;
1301
1302 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1303 switch (mode) {
1304 case TextureMiscMode::NODEP:
1305 return nodep_flag != 0;
1306 default:
1307 break;
1308 }
1309 return false;
1310 }
1311
1312 [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
1313 return ((1ULL << component) & component_mask) != 0;
1314 }
1315 } txq;
1316
1317 union {
1318 BitField<28, 1, u64> array;
1319 BitField<29, 2, TextureType> texture_type;
1320 BitField<31, 4, u64> component_mask;
1321 BitField<35, 1, u64> ndv_flag;
1322 BitField<49, 1, u64> nodep_flag;
1323
1324 [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
1325 return ((1ULL << component) & component_mask) != 0;
1326 }
1327
1328 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1329 switch (mode) {
1330 case TextureMiscMode::NDV:
1331 return (ndv_flag != 0);
1332 case TextureMiscMode::NODEP:
1333 return (nodep_flag != 0);
1334 default:
1335 break;
1336 }
1337 return false;
1338 }
1339 } tmml;
1340
1341 union {
1342 BitField<28, 1, u64> array;
1343 BitField<29, 2, TextureType> texture_type;
1344 BitField<35, 1, u64> ndv_flag;
1345 BitField<49, 1, u64> nodep_flag;
1346 BitField<50, 1, u64> dc_flag;
1347 BitField<54, 2, u64> offset_mode;
1348 BitField<56, 2, u64> component;
1349
1350 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1351 switch (mode) {
1352 case TextureMiscMode::NDV:
1353 return ndv_flag != 0;
1354 case TextureMiscMode::NODEP:
1355 return nodep_flag != 0;
1356 case TextureMiscMode::DC:
1357 return dc_flag != 0;
1358 case TextureMiscMode::AOFFI:
1359 return offset_mode == 1;
1360 case TextureMiscMode::PTP:
1361 return offset_mode == 2;
1362 default:
1363 break;
1364 }
1365 return false;
1366 }
1367 } tld4;
1368
1369 union {
1370 BitField<35, 1, u64> ndv_flag;
1371 BitField<49, 1, u64> nodep_flag;
1372 BitField<50, 1, u64> dc_flag;
1373 BitField<33, 2, u64> offset_mode;
1374 BitField<37, 2, u64> component;
1375
1376 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1377 switch (mode) {
1378 case TextureMiscMode::NDV:
1379 return ndv_flag != 0;
1380 case TextureMiscMode::NODEP:
1381 return nodep_flag != 0;
1382 case TextureMiscMode::DC:
1383 return dc_flag != 0;
1384 case TextureMiscMode::AOFFI:
1385 return offset_mode == 1;
1386 case TextureMiscMode::PTP:
1387 return offset_mode == 2;
1388 default:
1389 break;
1390 }
1391 return false;
1392 }
1393 } tld4_b;
1394
1395 union {
1396 BitField<49, 1, u64> nodep_flag;
1397 BitField<50, 1, u64> dc_flag;
1398 BitField<51, 1, u64> aoffi_flag;
1399 BitField<52, 2, u64> component;
1400 BitField<55, 1, u64> fp16_flag;
1401
1402 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1403 switch (mode) {
1404 case TextureMiscMode::DC:
1405 return dc_flag != 0;
1406 case TextureMiscMode::NODEP:
1407 return nodep_flag != 0;
1408 case TextureMiscMode::AOFFI:
1409 return aoffi_flag != 0;
1410 default:
1411 break;
1412 }
1413 return false;
1414 }
1415 } tld4s;
1416
1417 union {
1418 BitField<0, 8, Register> gpr0;
1419 BitField<28, 8, Register> gpr28;
1420 BitField<49, 1, u64> nodep_flag;
1421 BitField<50, 3, u64> component_mask_selector;
1422 BitField<53, 4, u64> texture_info;
1423 BitField<59, 1, u64> fp32_flag;
1424
1425 [[nodiscard]] TextureType GetTextureType() const {
1426 // The TEXS instruction has a weird encoding for the texture type.
1427 if (texture_info == 0) {
1428 return TextureType::Texture1D;
1429 }
1430 if (texture_info >= 1 && texture_info <= 9) {
1431 return TextureType::Texture2D;
1432 }
1433 if (texture_info >= 10 && texture_info <= 11) {
1434 return TextureType::Texture3D;
1435 }
1436 if (texture_info >= 12 && texture_info <= 13) {
1437 return TextureType::TextureCube;
1438 }
1439
1440 LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
1441 UNREACHABLE();
1442 return TextureType::Texture1D;
1443 }
1444
1445 [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
1446 switch (texture_info) {
1447 case 0:
1448 case 2:
1449 case 6:
1450 case 8:
1451 case 9:
1452 case 11:
1453 return TextureProcessMode::LZ;
1454 case 3:
1455 case 5:
1456 case 13:
1457 return TextureProcessMode::LL;
1458 default:
1459 break;
1460 }
1461 return TextureProcessMode::None;
1462 }
1463
1464 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1465 switch (mode) {
1466 case TextureMiscMode::DC:
1467 return (texture_info >= 4 && texture_info <= 6) || texture_info == 9;
1468 case TextureMiscMode::NODEP:
1469 return nodep_flag != 0;
1470 default:
1471 break;
1472 }
1473 return false;
1474 }
1475
1476 [[nodiscard]] bool IsArrayTexture() const {
1477 // TEXS only supports Texture2D arrays.
1478 return texture_info >= 7 && texture_info <= 9;
1479 }
1480
1481 [[nodiscard]] bool HasTwoDestinations() const {
1482 return gpr28.Value() != Register::ZeroIndex;
1483 }
1484
1485 [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
1486 static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
1487 {},
1488 {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
1489 {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
1490 {0x7, 0xb, 0xd, 0xe, 0xf},
1491 }};
1492
1493 std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
1494 index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0;
1495
1496 u32 mask = mask_lut[index][component_mask_selector];
1497 // A mask of 0 means this instruction uses an unimplemented mask.
1498 ASSERT(mask != 0);
1499 return ((1ull << component) & mask) != 0;
1500 }
1501 } texs;
1502
1503 union {
1504 BitField<28, 1, u64> is_array;
1505 BitField<29, 2, TextureType> texture_type;
1506 BitField<35, 1, u64> aoffi;
1507 BitField<49, 1, u64> nodep_flag;
1508 BitField<50, 1, u64> ms; // Multisample?
1509 BitField<54, 1, u64> cl;
1510 BitField<55, 1, u64> process_mode;
1511
1512 [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
1513 return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL;
1514 }
1515 } tld;
1516
1517 union {
1518 BitField<49, 1, u64> nodep_flag;
1519 BitField<53, 4, u64> texture_info;
1520 BitField<59, 1, u64> fp32_flag;
1521
1522 [[nodiscard]] TextureType GetTextureType() const {
1523 // The TLDS instruction has a weird encoding for the texture type.
1524 if (texture_info <= 1) {
1525 return TextureType::Texture1D;
1526 }
1527 if (texture_info == 2 || texture_info == 8 || texture_info == 12 ||
1528 (texture_info >= 4 && texture_info <= 6)) {
1529 return TextureType::Texture2D;
1530 }
1531 if (texture_info == 7) {
1532 return TextureType::Texture3D;
1533 }
1534
1535 LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
1536 UNREACHABLE();
1537 return TextureType::Texture1D;
1538 }
1539
1540 [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
1541 if (texture_info == 1 || texture_info == 5 || texture_info == 12) {
1542 return TextureProcessMode::LL;
1543 }
1544 return TextureProcessMode::LZ;
1545 }
1546
1547 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1548 switch (mode) {
1549 case TextureMiscMode::AOFFI:
1550 return texture_info == 12 || texture_info == 4;
1551 case TextureMiscMode::MZ:
1552 return texture_info == 5;
1553 case TextureMiscMode::NODEP:
1554 return nodep_flag != 0;
1555 default:
1556 break;
1557 }
1558 return false;
1559 }
1560
1561 [[nodiscard]] bool IsArrayTexture() const {
1562 // TEXS only supports Texture2D arrays.
1563 return texture_info == 8;
1564 }
1565 } tlds;
1566
1567 union {
1568 BitField<28, 1, u64> is_array;
1569 BitField<29, 2, TextureType> texture_type;
1570 BitField<35, 1, u64> aoffi_flag;
1571 BitField<49, 1, u64> nodep_flag;
1572
1573 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1574 switch (mode) {
1575 case TextureMiscMode::AOFFI:
1576 return aoffi_flag != 0;
1577 case TextureMiscMode::NODEP:
1578 return nodep_flag != 0;
1579 default:
1580 break;
1581 }
1582 return false;
1583 }
1584
1585 } txd;
1586
1587 union {
1588 BitField<24, 2, StoreCacheManagement> cache_management;
1589 BitField<33, 3, ImageType> image_type;
1590 BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
1591 BitField<51, 1, u64> is_immediate;
1592 BitField<52, 1, SurfaceDataMode> mode;
1593
1594 BitField<20, 3, StoreType> store_data_layout;
1595 BitField<20, 4, u64> component_mask_selector;
1596
1597 [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
1598 ASSERT(mode == SurfaceDataMode::P);
1599 constexpr u8 R = 0b0001;
1600 constexpr u8 G = 0b0010;
1601 constexpr u8 B = 0b0100;
1602 constexpr u8 A = 0b1000;
1603 constexpr std::array<u8, 16> mask = {
1604 0, (R), (G), (R | G), (B), (R | B),
1605 (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A),
1606 (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
1607 return std::bitset<4>{mask.at(component_mask_selector)}.test(component);
1608 }
1609
1610 [[nodiscard]] StoreType GetStoreDataLayout() const {
1611 ASSERT(mode == SurfaceDataMode::D_BA);
1612 return store_data_layout;
1613 }
1614 } suldst;
1615
1616 union {
1617 BitField<28, 1, u64> is_ba;
1618 BitField<51, 3, ImageAtomicOperationType> operation_type;
1619 BitField<33, 3, ImageType> image_type;
1620 BitField<29, 4, ImageAtomicOperation> operation;
1621 BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
1622 } suatom_d;
1623
1624 union {
1625 BitField<20, 24, u64> target;
1626 BitField<5, 1, u64> constant_buffer;
1627
1628 [[nodiscard]] s32 GetBranchTarget() const {
1629 // Sign extend the branch target offset
1630 const auto mask = 1U << (24 - 1);
1631 const auto target_value = static_cast<u32>(target);
1632 constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
1633
1634 // The branch offset is relative to the next instruction and is stored in bytes, so
1635 // divide it by the size of an instruction and add 1 to it.
1636 return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
1637 }
1638 } bra;
1639
1640 union {
1641 BitField<20, 24, u64> target;
1642 BitField<5, 1, u64> constant_buffer;
1643
1644 [[nodiscard]] s32 GetBranchExtend() const {
1645 // Sign extend the branch target offset
1646 const auto mask = 1U << (24 - 1);
1647 const auto target_value = static_cast<u32>(target);
1648 constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
1649
1650 // The branch offset is relative to the next instruction and is stored in bytes, so
1651 // divide it by the size of an instruction and add 1 to it.
1652 return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
1653 }
1654 } brx;
1655
1656 union {
1657 BitField<39, 1, u64> emit; // EmitVertex
1658 BitField<40, 1, u64> cut; // EndPrimitive
1659 } out;
1660
1661 union {
1662 BitField<31, 1, u64> skew;
1663 BitField<32, 1, u64> o;
1664 BitField<33, 2, IsberdMode> mode;
1665 BitField<47, 2, IsberdShift> shift;
1666 } isberd;
1667
1668 union {
1669 BitField<8, 2, MembarType> type;
1670 BitField<0, 2, MembarUnknown> unknown;
1671 } membar;
1672
1673 union {
1674 BitField<48, 1, u64> signed_a;
1675 BitField<38, 1, u64> is_byte_chunk_a;
1676 BitField<36, 2, VideoType> type_a;
1677 BitField<36, 2, u64> byte_height_a;
1678
1679 BitField<49, 1, u64> signed_b;
1680 BitField<50, 1, u64> use_register_b;
1681 BitField<30, 1, u64> is_byte_chunk_b;
1682 BitField<28, 2, VideoType> type_b;
1683 BitField<28, 2, u64> byte_height_b;
1684 } video;
1685
1686 union {
1687 BitField<51, 2, VmadShr> shr;
1688 BitField<55, 1, u64> saturate; // Saturates the result (a * b + c)
1689 BitField<47, 1, u64> cc;
1690 } vmad;
1691
1692 union {
1693 BitField<54, 1, u64> is_dest_signed;
1694 BitField<48, 1, u64> is_src_a_signed;
1695 BitField<49, 1, u64> is_src_b_signed;
1696 BitField<37, 2, u64> src_format_a;
1697 BitField<29, 2, u64> src_format_b;
1698 BitField<56, 1, u64> mx;
1699 BitField<55, 1, u64> sat;
1700 BitField<36, 2, u64> selector_a;
1701 BitField<28, 2, u64> selector_b;
1702 BitField<50, 1, u64> is_op_b_register;
1703 BitField<51, 3, VmnmxOperation> operation;
1704
1705 [[nodiscard]] VmnmxType SourceFormatA() const {
1706 switch (src_format_a) {
1707 case 0b11:
1708 return VmnmxType::Bits32;
1709 case 0b10:
1710 return VmnmxType::Bits16;
1711 default:
1712 return VmnmxType::Bits8;
1713 }
1714 }
1715
1716 [[nodiscard]] VmnmxType SourceFormatB() const {
1717 switch (src_format_b) {
1718 case 0b11:
1719 return VmnmxType::Bits32;
1720 case 0b10:
1721 return VmnmxType::Bits16;
1722 default:
1723 return VmnmxType::Bits8;
1724 }
1725 }
1726 } vmnmx;
1727
1728 union {
1729 BitField<20, 16, u64> imm20_16;
1730 BitField<35, 1, u64> high_b_rr; // used on RR
1731 BitField<36, 1, u64> product_shift_left;
1732 BitField<37, 1, u64> merge_37;
1733 BitField<48, 1, u64> sign_a;
1734 BitField<49, 1, u64> sign_b;
1735 BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC
1736 BitField<50, 3, XmadMode> mode;
1737 BitField<52, 1, u64> high_b;
1738 BitField<53, 1, u64> high_a;
1739 BitField<55, 1, u64> product_shift_left_second; // used on CR
1740 BitField<56, 1, u64> merge_56;
1741 } xmad;
1742
1743 union {
1744 BitField<20, 14, u64> shifted_offset;
1745 BitField<34, 5, u64> index;
1746
1747 [[nodiscard]] u64 GetOffset() const {
1748 return shifted_offset * 4;
1749 }
1750 } cbuf34;
1751
1752 union {
1753 BitField<20, 16, s64> offset;
1754 BitField<36, 5, u64> index;
1755
1756 [[nodiscard]] s64 GetOffset() const {
1757 return offset;
1758 }
1759 } cbuf36;
1760
1761 // Unsure about the size of this one.
1762 // It's always used with a gpr0, so any size should be fine.
1763 BitField<20, 8, SystemVariable> sys20;
1764
1765 BitField<47, 1, u64> generates_cc;
1766 BitField<61, 1, u64> is_b_imm;
1767 BitField<60, 1, u64> is_b_gpr;
1768 BitField<59, 1, u64> is_c_gpr;
1769 BitField<20, 24, s64> smem_imm;
1770 BitField<0, 5, ConditionCode> flow_condition_code;
1771
1772 Attribute attribute;
1773 Sampler sampler;
1774 Image image;
1775
1776 u64 value;
1777};
1778static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size");
1779static_assert(std::is_standard_layout_v<Instruction>, "Instruction is not standard layout");
1780
1781class OpCode {
1782public:
1783 enum class Id {
1784 KIL,
1785 SSY,
1786 SYNC,
1787 BRK,
1788 DEPBAR,
1789 VOTE,
1790 VOTE_VTG,
1791 SHFL,
1792 FSWZADD,
1793 BFE_C,
1794 BFE_R,
1795 BFE_IMM,
1796 BFI_RC,
1797 BFI_IMM_R,
1798 BRA,
1799 BRX,
1800 PBK,
1801 LD_A,
1802 LD_L,
1803 LD_S,
1804 LD_C,
1805 LD, // Load from generic memory
1806 LDG, // Load from global memory
1807 ST_A,
1808 ST_L,
1809 ST_S,
1810 ST, // Store in generic memory
1811 STG, // Store in global memory
1812 RED, // Reduction operation
1813 ATOM, // Atomic operation on global memory
1814 ATOMS, // Atomic operation on shared memory
1815 AL2P, // Transforms attribute memory into physical memory
1816 TEX,
1817 TEX_B, // Texture Load Bindless
1818 TXQ, // Texture Query
1819 TXQ_B, // Texture Query Bindless
1820 TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
1821 TLD, // Texture Load
1822 TLDS, // Texture Load with scalar/non-vec4 source/destinations
1823 TLD4, // Texture Gather 4
1824 TLD4_B, // Texture Gather 4 Bindless
1825 TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
1826 TMML_B, // Texture Mip Map Level
1827 TMML, // Texture Mip Map Level
1828 TXD, // Texture Gradient/Load with Derivates
1829 TXD_B, // Texture Gradient/Load with Derivates Bindless
1830 SUST, // Surface Store
1831 SULD, // Surface Load
1832 SUATOM, // Surface Atomic Operation
1833 EXIT,
1834 NOP,
1835 IPA,
1836 OUT_R, // Emit vertex/primitive
1837 ISBERD,
1838 BAR,
1839 MEMBAR,
1840 VMAD,
1841 VSETP,
1842 VMNMX,
1843 FFMA_IMM, // Fused Multiply and Add
1844 FFMA_CR,
1845 FFMA_RC,
1846 FFMA_RR,
1847 FADD_C,
1848 FADD_R,
1849 FADD_IMM,
1850 FADD32I,
1851 FMUL_C,
1852 FMUL_R,
1853 FMUL_IMM,
1854 FMUL32_IMM,
1855 IADD_C,
1856 IADD_R,
1857 IADD_IMM,
1858 IADD3_C, // Add 3 Integers
1859 IADD3_R,
1860 IADD3_IMM,
1861 IADD32I,
1862 ISCADD_C, // Scale and Add
1863 ISCADD_R,
1864 ISCADD_IMM,
1865 FLO_R,
1866 FLO_C,
1867 FLO_IMM,
1868 LEA_R1,
1869 LEA_R2,
1870 LEA_RZ,
1871 LEA_IMM,
1872 LEA_HI,
1873 HADD2_C,
1874 HADD2_R,
1875 HADD2_IMM,
1876 HMUL2_C,
1877 HMUL2_R,
1878 HMUL2_IMM,
1879 HFMA2_CR,
1880 HFMA2_RC,
1881 HFMA2_RR,
1882 HFMA2_IMM_R,
1883 HSETP2_C,
1884 HSETP2_R,
1885 HSETP2_IMM,
1886 HSET2_C,
1887 HSET2_R,
1888 HSET2_IMM,
1889 POPC_C,
1890 POPC_R,
1891 POPC_IMM,
1892 SEL_C,
1893 SEL_R,
1894 SEL_IMM,
1895 ICMP_RC,
1896 ICMP_R,
1897 ICMP_CR,
1898 ICMP_IMM,
1899 FCMP_RR,
1900 FCMP_RC,
1901 FCMP_IMMR,
1902 MUFU, // Multi-Function Operator
1903 RRO_C, // Range Reduction Operator
1904 RRO_R,
1905 RRO_IMM,
1906 F2F_C,
1907 F2F_R,
1908 F2F_IMM,
1909 F2I_C,
1910 F2I_R,
1911 F2I_IMM,
1912 I2F_C,
1913 I2F_R,
1914 I2F_IMM,
1915 I2I_C,
1916 I2I_R,
1917 I2I_IMM,
1918 LOP_C,
1919 LOP_R,
1920 LOP_IMM,
1921 LOP32I,
1922 LOP3_C,
1923 LOP3_R,
1924 LOP3_IMM,
1925 MOV_C,
1926 MOV_R,
1927 MOV_IMM,
1928 S2R,
1929 MOV32_IMM,
1930 SHL_C,
1931 SHL_R,
1932 SHL_IMM,
1933 SHR_C,
1934 SHR_R,
1935 SHR_IMM,
1936 SHF_RIGHT_R,
1937 SHF_RIGHT_IMM,
1938 SHF_LEFT_R,
1939 SHF_LEFT_IMM,
1940 FMNMX_C,
1941 FMNMX_R,
1942 FMNMX_IMM,
1943 IMNMX_C,
1944 IMNMX_R,
1945 IMNMX_IMM,
1946 FSETP_C, // Set Predicate
1947 FSETP_R,
1948 FSETP_IMM,
1949 FSET_C,
1950 FSET_R,
1951 FSET_IMM,
1952 ISETP_C,
1953 ISETP_IMM,
1954 ISETP_R,
1955 ISET_R,
1956 ISET_C,
1957 ISET_IMM,
1958 PSETP,
1959 PSET,
1960 CSETP,
1961 R2P_IMM,
1962 P2R_IMM,
1963 XMAD_IMM,
1964 XMAD_CR,
1965 XMAD_RC,
1966 XMAD_RR,
1967 };
1968
1969 enum class Type {
1970 Trivial,
1971 Arithmetic,
1972 ArithmeticImmediate,
1973 ArithmeticInteger,
1974 ArithmeticIntegerImmediate,
1975 ArithmeticHalf,
1976 ArithmeticHalfImmediate,
1977 Bfe,
1978 Bfi,
1979 Shift,
1980 Ffma,
1981 Hfma2,
1982 Flow,
1983 Synch,
1984 Warp,
1985 Memory,
1986 Texture,
1987 Image,
1988 FloatSet,
1989 FloatSetPredicate,
1990 IntegerSet,
1991 IntegerSetPredicate,
1992 HalfSet,
1993 HalfSetPredicate,
1994 PredicateSetPredicate,
1995 PredicateSetRegister,
1996 RegisterSetPredicate,
1997 Conversion,
1998 Video,
1999 Xmad,
2000 Unknown,
2001 };
2002
2003 /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be
2004 /// conditionally executed).
2005 [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) {
2006 // TODO(Subv): Add the rest of unpredicated instructions.
2007 return opcode != Id::SSY && opcode != Id::PBK;
2008 }
2009
2010 class Matcher {
2011 public:
2012 constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_)
2013 : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {}
2014
2015 [[nodiscard]] constexpr const char* GetName() const {
2016 return name;
2017 }
2018
2019 [[nodiscard]] constexpr u16 GetMask() const {
2020 return mask;
2021 }
2022
2023 [[nodiscard]] constexpr Id GetId() const {
2024 return id;
2025 }
2026
2027 [[nodiscard]] constexpr Type GetType() const {
2028 return type;
2029 }
2030
2031 /**
2032 * Tests to see if the given instruction is the instruction this matcher represents.
2033 * @param instruction The instruction to test
2034 * @returns true if the given instruction matches.
2035 */
2036 [[nodiscard]] constexpr bool Matches(u16 instruction) const {
2037 return (instruction & mask) == expected;
2038 }
2039
2040 private:
2041 const char* name;
2042 u16 mask;
2043 u16 expected;
2044 Id id;
2045 Type type;
2046 };
2047
2048 using DecodeResult = std::optional<std::reference_wrapper<const Matcher>>;
2049 [[nodiscard]] static DecodeResult Decode(Instruction instr) {
2050 static const auto table{GetDecodeTable()};
2051
2052 const auto matches_instruction = [instr](const auto& matcher) {
2053 return matcher.Matches(static_cast<u16>(instr.opcode));
2054 };
2055
2056 auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
2057 return iter != table.end() ? std::optional<std::reference_wrapper<const Matcher>>(*iter)
2058 : std::nullopt;
2059 }
2060
2061private:
2062 struct Detail {
2063 private:
2064 static constexpr std::size_t opcode_bitsize = 16;
2065
2066 /**
2067 * Generates the mask and the expected value after masking from a given bitstring.
2068 * A '0' in a bitstring indicates that a zero must be present at that bit position.
2069 * A '1' in a bitstring indicates that a one must be present at that bit position.
2070 */
2071 [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) {
2072 u16 mask = 0, expect = 0;
2073 for (std::size_t i = 0; i < opcode_bitsize; i++) {
2074 const std::size_t bit_position = opcode_bitsize - i - 1;
2075 switch (bitstring[i]) {
2076 case '0':
2077 mask |= static_cast<u16>(1U << bit_position);
2078 break;
2079 case '1':
2080 expect |= static_cast<u16>(1U << bit_position);
2081 mask |= static_cast<u16>(1U << bit_position);
2082 break;
2083 default:
2084 // Ignore
2085 break;
2086 }
2087 }
2088 return std::make_pair(mask, expect);
2089 }
2090
2091 public:
2092 /// Creates a matcher that can match and parse instructions based on bitstring.
2093 [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op,
2094 Type type, const char* const name) {
2095 const auto [mask, expected] = GetMaskAndExpect(bitstring);
2096 return Matcher(name, mask, expected, op, type);
2097 }
2098 };
2099
2100 [[nodiscard]] static std::vector<Matcher> GetDecodeTable() {
2101 std::vector<Matcher> table = {
2102#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
2103 INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
2104 INST("111000101001----", Id::SSY, Type::Flow, "SSY"),
2105 INST("111000101010----", Id::PBK, Type::Flow, "PBK"),
2106 INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
2107 INST("111000100101----", Id::BRX, Type::Flow, "BRX"),
2108 INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
2109 INST("111000110100----", Id::BRK, Type::Flow, "BRK"),
2110 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
2111 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
2112 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
2113 INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"),
2114 INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
2115 INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
2116 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
2117 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
2118 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
2119 INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
2120 INST("100-------------", Id::LD, Type::Memory, "LD"),
2121 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
2122 INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
2123 INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
2124 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
2125 INST("101-------------", Id::ST, Type::Memory, "ST"),
2126 INST("1110111011011---", Id::STG, Type::Memory, "STG"),
2127 INST("1110101111111---", Id::RED, Type::Memory, "RED"),
2128 INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
2129 INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
2130 INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
2131 INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
2132 INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
2133 INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
2134 INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
2135 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
2136 INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
2137 INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
2138 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
2139 INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
2140 INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"),
2141 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
2142 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
2143 INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
2144 INST("11011110001110--", Id::TXD, Type::Texture, "TXD"),
2145 INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
2146 INST("11101011000-----", Id::SULD, Type::Image, "SULD"),
2147 INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"),
2148 INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"),
2149 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
2150 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
2151 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
2152 INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"),
2153 INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
2154 INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
2155 INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
2156 INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"),
2157 INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
2158 INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
2159 INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
2160 INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"),
2161 INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"),
2162 INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"),
2163 INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"),
2164 INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"),
2165 INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"),
2166 INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
2167 INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
2168 INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"),
2169 INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"),
2170 INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"),
2171 INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"),
2172 INST("010011001100----", Id::IADD3_C, Type::ArithmeticInteger, "IADD3_C"),
2173 INST("010111001100----", Id::IADD3_R, Type::ArithmeticInteger, "IADD3_R"),
2174 INST("0011100-1100----", Id::IADD3_IMM, Type::ArithmeticInteger, "IADD3_IMM"),
2175 INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"),
2176 INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"),
2177 INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"),
2178 INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"),
2179 INST("0100110000001---", Id::POPC_C, Type::ArithmeticInteger, "POPC_C"),
2180 INST("0101110000001---", Id::POPC_R, Type::ArithmeticInteger, "POPC_R"),
2181 INST("0011100-00001---", Id::POPC_IMM, Type::ArithmeticInteger, "POPC_IMM"),
2182 INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"),
2183 INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"),
2184 INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"),
2185 INST("010100110100----", Id::ICMP_RC, Type::ArithmeticInteger, "ICMP_RC"),
2186 INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"),
2187 INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"),
2188 INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"),
2189 INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"),
2190 INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"),
2191 INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"),
2192 INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
2193 INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
2194 INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
2195 INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"),
2196 INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"),
2197 INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"),
2198 INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"),
2199 INST("0111101-0-------", Id::HADD2_IMM, Type::ArithmeticHalfImmediate, "HADD2_IMM"),
2200 INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"),
2201 INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"),
2202 INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"),
2203 INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"),
2204 INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
2205 INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
2206 INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
2207 INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
2208 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
2209 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
2210 INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"),
2211 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
2212 INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
2213 INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
2214 INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
2215 INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"),
2216 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
2217 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
2218 INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
2219 INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"),
2220 INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"),
2221 INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"),
2222 INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"),
2223 INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"),
2224 INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"),
2225 INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"),
2226 INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
2227 INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
2228 INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
2229 INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"),
2230 INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
2231 INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
2232 INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
2233 INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
2234 INST("0100110000100---", Id::IMNMX_C, Type::ArithmeticInteger, "IMNMX_C"),
2235 INST("0101110000100---", Id::IMNMX_R, Type::ArithmeticInteger, "IMNMX_R"),
2236 INST("0011100-00100---", Id::IMNMX_IMM, Type::ArithmeticInteger, "IMNMX_IMM"),
2237 INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
2238 INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
2239 INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
2240 INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"),
2241 INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
2242 INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
2243 INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
2244 INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
2245 INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
2246 INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"),
2247 INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"),
2248 INST("0011110---------", Id::LOP3_IMM, Type::ArithmeticInteger, "LOP3_IMM"),
2249 INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"),
2250 INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"),
2251 INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"),
2252 INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"),
2253 INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"),
2254 INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"),
2255 INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"),
2256 INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"),
2257 INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"),
2258 INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"),
2259 INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
2260 INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
2261 INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
2262 INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"),
2263 INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"),
2264 INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"),
2265 INST("01011000--------", Id::FSET_R, Type::FloatSet, "FSET_R"),
2266 INST("0100100---------", Id::FSET_C, Type::FloatSet, "FSET_C"),
2267 INST("0011000---------", Id::FSET_IMM, Type::FloatSet, "FSET_IMM"),
2268 INST("010010111011----", Id::FSETP_C, Type::FloatSetPredicate, "FSETP_C"),
2269 INST("010110111011----", Id::FSETP_R, Type::FloatSetPredicate, "FSETP_R"),
2270 INST("0011011-1011----", Id::FSETP_IMM, Type::FloatSetPredicate, "FSETP_IMM"),
2271 INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"),
2272 INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"),
2273 INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"),
2274 INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"),
2275 INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
2276 INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
2277 INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
2278 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
2279 INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
2280 INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"),
2281 INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"),
2282 INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
2283 INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
2284 INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
2285 INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"),
2286 };
2287#undef INST
2288 std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
2289 // If a matcher has more bits in its mask it is more specific, so it
2290 // should come first.
2291 return std::bitset<16>(a.GetMask()).count() > std::bitset<16>(b.GetMask()).count();
2292 });
2293
2294 return table;
2295 }
2296};
2297
2298} // namespace Tegra::Shader
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
deleted file mode 100644
index e0d7b89c5..000000000
--- a/src/video_core/engines/shader_header.h
+++ /dev/null
@@ -1,158 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9
10#include "common/bit_field.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13
14namespace Tegra::Shader {
15
16enum class OutputTopology : u32 {
17 PointList = 1,
18 LineStrip = 6,
19 TriangleStrip = 7,
20};
21
22enum class PixelImap : u8 {
23 Unused = 0,
24 Constant = 1,
25 Perspective = 2,
26 ScreenLinear = 3,
27};
28
29// Documentation in:
30// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
31struct Header {
32 union {
33 BitField<0, 5, u32> sph_type;
34 BitField<5, 5, u32> version;
35 BitField<10, 4, u32> shader_type;
36 BitField<14, 1, u32> mrt_enable;
37 BitField<15, 1, u32> kills_pixels;
38 BitField<16, 1, u32> does_global_store;
39 BitField<17, 4, u32> sass_version;
40 BitField<21, 5, u32> reserved;
41 BitField<26, 1, u32> does_load_or_store;
42 BitField<27, 1, u32> does_fp64;
43 BitField<28, 4, u32> stream_out_mask;
44 } common0;
45
46 union {
47 BitField<0, 24, u32> shader_local_memory_low_size;
48 BitField<24, 8, u32> per_patch_attribute_count;
49 } common1;
50
51 union {
52 BitField<0, 24, u32> shader_local_memory_high_size;
53 BitField<24, 8, u32> threads_per_input_primitive;
54 } common2;
55
56 union {
57 BitField<0, 24, u32> shader_local_memory_crs_size;
58 BitField<24, 4, OutputTopology> output_topology;
59 BitField<28, 4, u32> reserved;
60 } common3;
61
62 union {
63 BitField<0, 12, u32> max_output_vertices;
64 BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
65 BitField<20, 4, u32> reserved;
66 BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
67 } common4;
68
69 union {
70 struct {
71 INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
72 INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
73 INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32]
74 INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
75 union {
76 BitField<0, 8, u16> clip_distances;
77 BitField<8, 1, u16> point_sprite_s;
78 BitField<9, 1, u16> point_sprite_t;
79 BitField<10, 1, u16> fog_coordinate;
80 BitField<12, 1, u16> tessellation_eval_point_u;
81 BitField<13, 1, u16> tessellation_eval_point_v;
82 BitField<14, 1, u16> instance_id;
83 BitField<15, 1, u16> vertex_id;
84 };
85 INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10]
86 INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved
87 INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA
88 INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB
89 INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32]
90 INSERT_PADDING_BYTES_NOINIT(2); // OmapColor
91 INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC
92 INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10]
93 INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved
94 } vtg;
95
96 struct {
97 INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
98 INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
99
100 union {
101 BitField<0, 2, PixelImap> x;
102 BitField<2, 2, PixelImap> y;
103 BitField<4, 2, PixelImap> z;
104 BitField<6, 2, PixelImap> w;
105 u8 raw;
106 } imap_generic_vector[32];
107
108 INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
109 INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC
110 INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
111 INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved
112
113 struct {
114 u32 target;
115 union {
116 BitField<0, 1, u32> sample_mask;
117 BitField<1, 1, u32> depth;
118 BitField<2, 30, u32> reserved;
119 };
120 } omap;
121
122 bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
123 const u32 bit = render_target * 4 + component;
124 return omap.target & (1 << bit);
125 }
126
127 PixelImap GetPixelImap(u32 attribute) const {
128 const auto get_index = [this, attribute](u32 index) {
129 return static_cast<PixelImap>(
130 (imap_generic_vector[attribute].raw >> (index * 2)) & 3);
131 };
132
133 std::optional<PixelImap> result;
134 for (u32 component = 0; component < 4; ++component) {
135 const PixelImap index = get_index(component);
136 if (index == PixelImap::Unused) {
137 continue;
138 }
139 if (result && result != index) {
140 LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode");
141 }
142 result = index;
143 }
144 return result.value_or(PixelImap::Unused);
145 }
146 } ps;
147
148 std::array<u32, 0xF> raw;
149 };
150
151 u64 GetLocalMemorySize() const {
152 return (common1.shader_local_memory_low_size |
153 (common2.shader_local_memory_high_size << 24));
154 }
155};
156static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
157
158} // namespace Tegra::Shader
diff --git a/src/video_core/engines/shader_type.h b/src/video_core/engines/shader_type.h
deleted file mode 100644
index 49ce5cde5..000000000
--- a/src/video_core/engines/shader_type.h
+++ /dev/null
@@ -1,21 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Tegra::Engines {
10
11enum class ShaderType : u32 {
12 Vertex = 0,
13 TesselationControl = 1,
14 TesselationEval = 2,
15 Geometry = 3,
16 Fragment = 4,
17 Compute = 5,
18};
19static constexpr std::size_t MaxShaderTypes = 6;
20
21} // namespace Tegra::Engines
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
deleted file mode 100644
index f058f2744..000000000
--- a/src/video_core/guest_driver.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <limits>
7#include <vector>
8
9#include "common/common_types.h"
10#include "video_core/guest_driver.h"
11
12namespace VideoCore {
13
14void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) {
15 if (texture_handler_size) {
16 return;
17 }
18 const std::size_t size = bound_offsets.size();
19 if (size < 2) {
20 return;
21 }
22 std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{});
23 u32 min_val = std::numeric_limits<u32>::max();
24 for (std::size_t i = 1; i < size; ++i) {
25 if (bound_offsets[i] == bound_offsets[i - 1]) {
26 continue;
27 }
28 const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
29 min_val = std::min(min_val, new_min);
30 }
31 if (min_val > 2) {
32 return;
33 }
34 texture_handler_size = min_texture_handler_size * min_val;
35}
36
37} // namespace VideoCore
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
deleted file mode 100644
index 21e569ba1..000000000
--- a/src/video_core/guest_driver.h
+++ /dev/null
@@ -1,46 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <vector>
9
10#include "common/common_types.h"
11
12namespace VideoCore {
13
14/**
15 * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
16 * information necessary for impossible to avoid HLE methods like shader tracks as they are
17 * Entscheidungsproblems.
18 */
19class GuestDriverProfile {
20public:
21 explicit GuestDriverProfile() = default;
22 explicit GuestDriverProfile(std::optional<u32> texture_handler_size_)
23 : texture_handler_size{texture_handler_size_} {}
24
25 void DeduceTextureHandlerSize(std::vector<u32> bound_offsets);
26
27 u32 GetTextureHandlerSize() const {
28 return texture_handler_size.value_or(default_texture_handler_size);
29 }
30
31 bool IsTextureHandlerSizeKnown() const {
32 return texture_handler_size.has_value();
33 }
34
35private:
36 // Minimum size of texture handler any driver can use.
37 static constexpr u32 min_texture_handler_size = 4;
38
39 // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead.
40 // Thus, certain drivers may squish the size.
41 static constexpr u32 default_texture_handler_size = 8;
42
43 std::optional<u32> texture_handler_size = default_texture_handler_size;
44};
45
46} // namespace VideoCore
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index d2b9d5f2b..882eff880 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -69,7 +69,6 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
69 } else { 69 } else {
70 UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); 70 UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
71 } 71 }
72
73 const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); 72 const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
74 73
75 for (const auto& map : submapped_ranges) { 74 for (const auto& map : submapped_ranges) {
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 58014c1c3..b094fc064 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -11,7 +11,6 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/engines/fermi_2d.h" 12#include "video_core/engines/fermi_2d.h"
13#include "video_core/gpu.h" 13#include "video_core/gpu.h"
14#include "video_core/guest_driver.h"
15 14
16namespace Tegra { 15namespace Tegra {
17class MemoryManager; 16class MemoryManager;
@@ -45,7 +44,7 @@ public:
45 virtual void Clear() = 0; 44 virtual void Clear() = 0;
46 45
47 /// Dispatches a compute shader invocation 46 /// Dispatches a compute shader invocation
48 virtual void DispatchCompute(GPUVAddr code_addr) = 0; 47 virtual void DispatchCompute() = 0;
49 48
50 /// Resets the counter of a query 49 /// Resets the counter of a query
51 virtual void ResetCounter(QueryType type) = 0; 50 virtual void ResetCounter(QueryType type) = 0;
@@ -136,18 +135,5 @@ public:
136 /// Initialize disk cached resources for the game being emulated 135 /// Initialize disk cached resources for the game being emulated
137 virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, 136 virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
138 const DiskResourceLoadCallback& callback) {} 137 const DiskResourceLoadCallback& callback) {}
139
140 /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
141 [[nodiscard]] GuestDriverProfile& AccessGuestDriverProfile() {
142 return guest_driver_profile;
143 }
144
145 /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
146 [[nodiscard]] const GuestDriverProfile& AccessGuestDriverProfile() const {
147 return guest_driver_profile;
148 }
149
150private:
151 GuestDriverProfile guest_driver_profile{};
152}; 138};
153} // namespace VideoCore 139} // namespace VideoCore
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
deleted file mode 100644
index e8d8d2aa5..000000000
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ /dev/null
@@ -1,2124 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cstddef>
8#include <string>
9#include <string_view>
10#include <utility>
11#include <variant>
12
13#include <fmt/format.h>
14
15#include "common/alignment.h"
16#include "common/assert.h"
17#include "common/common_types.h"
18#include "video_core/renderer_opengl/gl_arb_decompiler.h"
19#include "video_core/renderer_opengl/gl_device.h"
20#include "video_core/shader/registry.h"
21#include "video_core/shader/shader_ir.h"
22
23// Predicates in the decompiled code follow the convention that -1 means true and 0 means false.
24// GLASM lacks booleans, so they have to be implemented as integers.
25// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to
26// select between two values, because -1 will be evaluated as true and 0 as false.
27
28namespace OpenGL {
29
30namespace {
31
32using Tegra::Engines::ShaderType;
33using Tegra::Shader::Attribute;
34using Tegra::Shader::PixelImap;
35using Tegra::Shader::Register;
36using namespace VideoCommon::Shader;
37using Operation = const OperationNode&;
38
39constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
40
41char Swizzle(std::size_t component) {
42 static constexpr std::string_view SWIZZLE{"xyzw"};
43 return SWIZZLE.at(component);
44}
45
46constexpr bool IsGenericAttribute(Attribute::Index index) {
47 return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
48}
49
50u32 GetGenericAttributeIndex(Attribute::Index index) {
51 ASSERT(IsGenericAttribute(index));
52 return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
53}
54
55std::string_view Modifiers(Operation operation) {
56 const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta());
57 if (meta && meta->precise) {
58 return ".PREC";
59 }
60 return "";
61}
62
63std::string_view GetInputFlags(PixelImap attribute) {
64 switch (attribute) {
65 case PixelImap::Perspective:
66 return "";
67 case PixelImap::Constant:
68 return "FLAT ";
69 case PixelImap::ScreenLinear:
70 return "NOPERSPECTIVE ";
71 case PixelImap::Unused:
72 break;
73 }
74 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
75 return {};
76}
77
78std::string_view ImageType(Tegra::Shader::ImageType image_type) {
79 switch (image_type) {
80 case Tegra::Shader::ImageType::Texture1D:
81 return "1D";
82 case Tegra::Shader::ImageType::TextureBuffer:
83 return "BUFFER";
84 case Tegra::Shader::ImageType::Texture1DArray:
85 return "ARRAY1D";
86 case Tegra::Shader::ImageType::Texture2D:
87 return "2D";
88 case Tegra::Shader::ImageType::Texture2DArray:
89 return "ARRAY2D";
90 case Tegra::Shader::ImageType::Texture3D:
91 return "3D";
92 }
93 UNREACHABLE();
94 return {};
95}
96
97std::string_view StackName(MetaStackClass stack) {
98 switch (stack) {
99 case MetaStackClass::Ssy:
100 return "SSY";
101 case MetaStackClass::Pbk:
102 return "PBK";
103 }
104 UNREACHABLE();
105 return "";
106};
107
108std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
109 switch (topology) {
110 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
111 return "POINTS";
112 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
113 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
114 return "LINES";
115 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
116 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
117 return "LINES_ADJACENCY";
118 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
119 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
120 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
121 return "TRIANGLES";
122 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
123 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
124 return "TRIANGLES_ADJACENCY";
125 default:
126 UNIMPLEMENTED_MSG("topology={}", topology);
127 return "POINTS";
128 }
129}
130
131std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
132 switch (topology) {
133 case Tegra::Shader::OutputTopology::PointList:
134 return "POINTS";
135 case Tegra::Shader::OutputTopology::LineStrip:
136 return "LINE_STRIP";
137 case Tegra::Shader::OutputTopology::TriangleStrip:
138 return "TRIANGLE_STRIP";
139 default:
140 UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
141 return "points";
142 }
143}
144
145std::string_view StageInputName(ShaderType stage) {
146 switch (stage) {
147 case ShaderType::Vertex:
148 case ShaderType::Geometry:
149 return "vertex";
150 case ShaderType::Fragment:
151 return "fragment";
152 case ShaderType::Compute:
153 return "invocation";
154 default:
155 UNREACHABLE();
156 return "";
157 }
158}
159
160std::string TextureType(const MetaTexture& meta) {
161 if (meta.sampler.is_buffer) {
162 return "BUFFER";
163 }
164 std::string type;
165 if (meta.sampler.is_shadow) {
166 type += "SHADOW";
167 }
168 if (meta.sampler.is_array) {
169 type += "ARRAY";
170 }
171 type += [&meta] {
172 switch (meta.sampler.type) {
173 case Tegra::Shader::TextureType::Texture1D:
174 return "1D";
175 case Tegra::Shader::TextureType::Texture2D:
176 return "2D";
177 case Tegra::Shader::TextureType::Texture3D:
178 return "3D";
179 case Tegra::Shader::TextureType::TextureCube:
180 return "CUBE";
181 }
182 UNREACHABLE();
183 return "2D";
184 }();
185 return type;
186}
187
188class ARBDecompiler final {
189public:
190 explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
191 ShaderType stage_, std::string_view identifier);
192
193 std::string Code() const {
194 return shader_source;
195 }
196
197private:
198 void DefineGlobalMemory();
199
200 void DeclareHeader();
201 void DeclareVertex();
202 void DeclareGeometry();
203 void DeclareFragment();
204 void DeclareCompute();
205 void DeclareInputAttributes();
206 void DeclareOutputAttributes();
207 void DeclareLocalMemory();
208 void DeclareGlobalMemory();
209 void DeclareConstantBuffers();
210 void DeclareRegisters();
211 void DeclareTemporaries();
212 void DeclarePredicates();
213 void DeclareInternalFlags();
214
215 void InitializeVariables();
216
217 void DecompileAST();
218 void DecompileBranchMode();
219
220 void VisitAST(const ASTNode& node);
221 std::string VisitExpression(const Expr& node);
222
223 void VisitBlock(const NodeBlock& bb);
224
225 std::string Visit(const Node& node);
226
227 std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation);
228 std::string BuildAoffi(Operation);
229 std::string GlobalMemoryPointer(const GmemNode& gmem);
230 void Exit();
231
232 std::string Assign(Operation);
233 std::string Select(Operation);
234 std::string FClamp(Operation);
235 std::string FCastHalf0(Operation);
236 std::string FCastHalf1(Operation);
237 std::string FSqrt(Operation);
238 std::string FSwizzleAdd(Operation);
239 std::string HAdd2(Operation);
240 std::string HMul2(Operation);
241 std::string HFma2(Operation);
242 std::string HAbsolute(Operation);
243 std::string HNegate(Operation);
244 std::string HClamp(Operation);
245 std::string HCastFloat(Operation);
246 std::string HUnpack(Operation);
247 std::string HMergeF32(Operation);
248 std::string HMergeH0(Operation);
249 std::string HMergeH1(Operation);
250 std::string HPack2(Operation);
251 std::string LogicalAssign(Operation);
252 std::string LogicalPick2(Operation);
253 std::string LogicalAnd2(Operation);
254 std::string FloatOrdered(Operation);
255 std::string FloatUnordered(Operation);
256 std::string LogicalAddCarry(Operation);
257 std::string Texture(Operation);
258 std::string TextureGather(Operation);
259 std::string TextureQueryDimensions(Operation);
260 std::string TextureQueryLod(Operation);
261 std::string TexelFetch(Operation);
262 std::string TextureGradient(Operation);
263 std::string ImageLoad(Operation);
264 std::string ImageStore(Operation);
265 std::string Branch(Operation);
266 std::string BranchIndirect(Operation);
267 std::string PushFlowStack(Operation);
268 std::string PopFlowStack(Operation);
269 std::string Exit(Operation);
270 std::string Discard(Operation);
271 std::string EmitVertex(Operation);
272 std::string EndPrimitive(Operation);
273 std::string InvocationId(Operation);
274 std::string YNegate(Operation);
275 std::string ThreadId(Operation);
276 std::string ShuffleIndexed(Operation);
277 std::string Barrier(Operation);
278 std::string MemoryBarrierGroup(Operation);
279 std::string MemoryBarrierGlobal(Operation);
280
281 template <const std::string_view& op>
282 std::string Unary(Operation operation) {
283 std::string temporary = AllocTemporary();
284 AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
285 return temporary;
286 }
287
288 template <const std::string_view& op>
289 std::string Binary(Operation operation) {
290 std::string temporary = AllocTemporary();
291 AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
292 Visit(operation[1]));
293 return temporary;
294 }
295
296 template <const std::string_view& op>
297 std::string Trinary(Operation operation) {
298 std::string temporary = AllocTemporary();
299 AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
300 Visit(operation[1]), Visit(operation[2]));
301 return temporary;
302 }
303
304 template <const std::string_view& op, bool unordered>
305 std::string FloatComparison(Operation operation) {
306 std::string temporary = AllocTemporary();
307 AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
308 AddLine("MOV.S {}, 0;", temporary);
309 AddLine("MOV.S {} (NE.x), -1;", temporary);
310
311 const std::string op_a = Visit(operation[0]);
312 const std::string op_b = Visit(operation[1]);
313 if constexpr (unordered) {
314 AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
315 AddLine("TRUNC.U.CC RC.x, RC.x;");
316 AddLine("MOV.S {} (NE.x), -1;", temporary);
317 AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
318 AddLine("TRUNC.U.CC RC.x, RC.x;");
319 AddLine("MOV.S {} (NE.x), -1;", temporary);
320 } else if (op == SNE_F) {
321 AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
322 AddLine("TRUNC.U.CC RC.x, RC.x;");
323 AddLine("MOV.S {} (NE.x), 0;", temporary);
324 AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
325 AddLine("TRUNC.U.CC RC.x, RC.x;");
326 AddLine("MOV.S {} (NE.x), 0;", temporary);
327 }
328 return temporary;
329 }
330
331 template <const std::string_view& op, bool is_nan>
332 std::string HalfComparison(Operation operation) {
333 std::string tmp1 = AllocVectorTemporary();
334 const std::string tmp2 = AllocVectorTemporary();
335 const std::string op_a = Visit(operation[0]);
336 const std::string op_b = Visit(operation[1]);
337 AddLine("UP2H.F {}, {};", tmp1, op_a);
338 AddLine("UP2H.F {}, {};", tmp2, op_b);
339 AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2);
340 AddLine("TRUNC.U.CC RC.xy, {};", tmp1);
341 AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1);
342 AddLine("MOV.S {}.x (NE.x), -1;", tmp1);
343 AddLine("MOV.S {}.y (NE.y), -1;", tmp1);
344 if constexpr (is_nan) {
345 AddLine("MOVC.F RC.x, {};", op_a);
346 AddLine("MOV.S {}.x (NAN.x), -1;", tmp1);
347 AddLine("MOVC.F RC.x, {};", op_b);
348 AddLine("MOV.S {}.y (NAN.x), -1;", tmp1);
349 }
350 return tmp1;
351 }
352
353 template <const std::string_view& op, const std::string_view& type>
354 std::string AtomicImage(Operation operation) {
355 const auto& meta = std::get<MetaImage>(operation.GetMeta());
356 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
357 const std::size_t num_coords = operation.GetOperandsCount();
358 const std::size_t num_values = meta.values.size();
359
360 const std::string coord = AllocVectorTemporary();
361 const std::string value = AllocVectorTemporary();
362 for (std::size_t i = 0; i < num_coords; ++i) {
363 AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
364 }
365 for (std::size_t i = 0; i < num_values; ++i) {
366 AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
367 }
368
369 AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord,
370 image_id, ImageType(meta.image.type));
371 return fmt::format("{}.x", coord);
372 }
373
374 template <const std::string_view& op, const std::string_view& type>
375 std::string Atomic(Operation operation) {
376 std::string temporary = AllocTemporary();
377 std::string address;
378 std::string_view opname;
379 bool robust = false;
380 if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
381 address = GlobalMemoryPointer(*gmem);
382 opname = "ATOM";
383 robust = true;
384 } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
385 address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
386 opname = "ATOMS";
387 } else {
388 UNREACHABLE();
389 return "{0, 0, 0, 0}";
390 }
391 if (robust) {
392 AddLine("IF NE.x;");
393 }
394 AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
395 if (robust) {
396 AddLine("ELSE;");
397 AddLine("MOV.S {}, 0;", temporary);
398 AddLine("ENDIF;");
399 }
400 return temporary;
401 }
402
403 template <char type>
404 std::string Negate(Operation operation) {
405 std::string temporary = AllocTemporary();
406 if constexpr (type == 'F') {
407 AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
408 } else {
409 AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0]));
410 }
411 return temporary;
412 }
413
414 template <char type>
415 std::string Absolute(Operation operation) {
416 std::string temporary = AllocTemporary();
417 AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
418 return temporary;
419 }
420
421 template <char type>
422 std::string BitfieldInsert(Operation operation) {
423 const std::string temporary = AllocVectorTemporary();
424 AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3]));
425 AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2]));
426 AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]),
427 Visit(operation[0]));
428 return fmt::format("{}.x", temporary);
429 }
430
431 template <char type>
432 std::string BitfieldExtract(Operation operation) {
433 const std::string temporary = AllocVectorTemporary();
434 AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2]));
435 AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1]));
436 AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0]));
437 return fmt::format("{}.x", temporary);
438 }
439
440 template <char swizzle>
441 std::string LocalInvocationId(Operation) {
442 return fmt::format("invocation.localid.{}", swizzle);
443 }
444
445 template <char swizzle>
446 std::string WorkGroupId(Operation) {
447 return fmt::format("invocation.groupid.{}", swizzle);
448 }
449
450 template <char c1, char c2>
451 std::string ThreadMask(Operation) {
452 return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2);
453 }
454
455 template <typename... Args>
456 void AddExpression(std::string_view text, Args&&... args) {
457 shader_source += fmt::format(fmt::runtime(text), std::forward<Args>(args)...);
458 }
459
460 template <typename... Args>
461 void AddLine(std::string_view text, Args&&... args) {
462 AddExpression(text, std::forward<Args>(args)...);
463 shader_source += '\n';
464 }
465
466 std::string AllocLongVectorTemporary() {
467 max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1);
468 return fmt::format("L{}", num_long_temporaries++);
469 }
470
471 std::string AllocLongTemporary() {
472 return fmt::format("{}.x", AllocLongVectorTemporary());
473 }
474
475 std::string AllocVectorTemporary() {
476 max_temporaries = std::max(max_temporaries, num_temporaries + 1);
477 return fmt::format("T{}", num_temporaries++);
478 }
479
480 std::string AllocTemporary() {
481 return fmt::format("{}.x", AllocVectorTemporary());
482 }
483
484 void ResetTemporaries() noexcept {
485 num_temporaries = 0;
486 num_long_temporaries = 0;
487 }
488
489 const Device& device;
490 const ShaderIR& ir;
491 const Registry& registry;
492 const ShaderType stage;
493
494 std::size_t num_temporaries = 0;
495 std::size_t max_temporaries = 0;
496
497 std::size_t num_long_temporaries = 0;
498 std::size_t max_long_temporaries = 0;
499
500 std::map<GlobalMemoryBase, u32> global_memory_names;
501
502 std::string shader_source;
503
504 static constexpr std::string_view ADD_F32 = "ADD.F32";
505 static constexpr std::string_view ADD_S = "ADD.S";
506 static constexpr std::string_view ADD_U = "ADD.U";
507 static constexpr std::string_view MUL_F32 = "MUL.F32";
508 static constexpr std::string_view MUL_S = "MUL.S";
509 static constexpr std::string_view MUL_U = "MUL.U";
510 static constexpr std::string_view DIV_F32 = "DIV.F32";
511 static constexpr std::string_view DIV_S = "DIV.S";
512 static constexpr std::string_view DIV_U = "DIV.U";
513 static constexpr std::string_view MAD_F32 = "MAD.F32";
514 static constexpr std::string_view RSQ_F32 = "RSQ.F32";
515 static constexpr std::string_view COS_F32 = "COS.F32";
516 static constexpr std::string_view SIN_F32 = "SIN.F32";
517 static constexpr std::string_view EX2_F32 = "EX2.F32";
518 static constexpr std::string_view LG2_F32 = "LG2.F32";
519 static constexpr std::string_view SLT_F = "SLT.F32";
520 static constexpr std::string_view SLT_S = "SLT.S";
521 static constexpr std::string_view SLT_U = "SLT.U";
522 static constexpr std::string_view SEQ_F = "SEQ.F32";
523 static constexpr std::string_view SEQ_S = "SEQ.S";
524 static constexpr std::string_view SEQ_U = "SEQ.U";
525 static constexpr std::string_view SLE_F = "SLE.F32";
526 static constexpr std::string_view SLE_S = "SLE.S";
527 static constexpr std::string_view SLE_U = "SLE.U";
528 static constexpr std::string_view SGT_F = "SGT.F32";
529 static constexpr std::string_view SGT_S = "SGT.S";
530 static constexpr std::string_view SGT_U = "SGT.U";
531 static constexpr std::string_view SNE_F = "SNE.F32";
532 static constexpr std::string_view SNE_S = "SNE.S";
533 static constexpr std::string_view SNE_U = "SNE.U";
534 static constexpr std::string_view SGE_F = "SGE.F32";
535 static constexpr std::string_view SGE_S = "SGE.S";
536 static constexpr std::string_view SGE_U = "SGE.U";
537 static constexpr std::string_view AND_S = "AND.S";
538 static constexpr std::string_view AND_U = "AND.U";
539 static constexpr std::string_view TRUNC_F = "TRUNC.F";
540 static constexpr std::string_view TRUNC_S = "TRUNC.S";
541 static constexpr std::string_view TRUNC_U = "TRUNC.U";
542 static constexpr std::string_view SHL_S = "SHL.S";
543 static constexpr std::string_view SHL_U = "SHL.U";
544 static constexpr std::string_view SHR_S = "SHR.S";
545 static constexpr std::string_view SHR_U = "SHR.U";
546 static constexpr std::string_view OR_S = "OR.S";
547 static constexpr std::string_view OR_U = "OR.U";
548 static constexpr std::string_view XOR_S = "XOR.S";
549 static constexpr std::string_view XOR_U = "XOR.U";
550 static constexpr std::string_view NOT_S = "NOT.S";
551 static constexpr std::string_view NOT_U = "NOT.U";
552 static constexpr std::string_view BTC_S = "BTC.S";
553 static constexpr std::string_view BTC_U = "BTC.U";
554 static constexpr std::string_view BTFM_S = "BTFM.S";
555 static constexpr std::string_view BTFM_U = "BTFM.U";
556 static constexpr std::string_view ROUND_F = "ROUND.F";
557 static constexpr std::string_view CEIL_F = "CEIL.F";
558 static constexpr std::string_view FLR_F = "FLR.F";
559 static constexpr std::string_view I2F_S = "I2F.S";
560 static constexpr std::string_view I2F_U = "I2F.U";
561 static constexpr std::string_view MIN_F = "MIN.F";
562 static constexpr std::string_view MIN_S = "MIN.S";
563 static constexpr std::string_view MIN_U = "MIN.U";
564 static constexpr std::string_view MAX_F = "MAX.F";
565 static constexpr std::string_view MAX_S = "MAX.S";
566 static constexpr std::string_view MAX_U = "MAX.U";
567 static constexpr std::string_view MOV_U = "MOV.U";
568 static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U";
569 static constexpr std::string_view TGALL_U = "TGALL.U";
570 static constexpr std::string_view TGANY_U = "TGANY.U";
571 static constexpr std::string_view TGEQ_U = "TGEQ.U";
572 static constexpr std::string_view EXCH = "EXCH";
573 static constexpr std::string_view ADD = "ADD";
574 static constexpr std::string_view MIN = "MIN";
575 static constexpr std::string_view MAX = "MAX";
576 static constexpr std::string_view AND = "AND";
577 static constexpr std::string_view OR = "OR";
578 static constexpr std::string_view XOR = "XOR";
579 static constexpr std::string_view U32 = "U32";
580 static constexpr std::string_view S32 = "S32";
581
582 static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount);
583 using DecompilerType = std::string (ARBDecompiler::*)(Operation);
584 static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = {
585 &ARBDecompiler::Assign,
586
587 &ARBDecompiler::Select,
588
589 &ARBDecompiler::Binary<ADD_F32>,
590 &ARBDecompiler::Binary<MUL_F32>,
591 &ARBDecompiler::Binary<DIV_F32>,
592 &ARBDecompiler::Trinary<MAD_F32>,
593 &ARBDecompiler::Negate<'F'>,
594 &ARBDecompiler::Absolute<'F'>,
595 &ARBDecompiler::FClamp,
596 &ARBDecompiler::FCastHalf0,
597 &ARBDecompiler::FCastHalf1,
598 &ARBDecompiler::Binary<MIN_F>,
599 &ARBDecompiler::Binary<MAX_F>,
600 &ARBDecompiler::Unary<COS_F32>,
601 &ARBDecompiler::Unary<SIN_F32>,
602 &ARBDecompiler::Unary<EX2_F32>,
603 &ARBDecompiler::Unary<LG2_F32>,
604 &ARBDecompiler::Unary<RSQ_F32>,
605 &ARBDecompiler::FSqrt,
606 &ARBDecompiler::Unary<ROUND_F>,
607 &ARBDecompiler::Unary<FLR_F>,
608 &ARBDecompiler::Unary<CEIL_F>,
609 &ARBDecompiler::Unary<TRUNC_F>,
610 &ARBDecompiler::Unary<I2F_S>,
611 &ARBDecompiler::Unary<I2F_U>,
612 &ARBDecompiler::FSwizzleAdd,
613
614 &ARBDecompiler::Binary<ADD_S>,
615 &ARBDecompiler::Binary<MUL_S>,
616 &ARBDecompiler::Binary<DIV_S>,
617 &ARBDecompiler::Negate<'S'>,
618 &ARBDecompiler::Absolute<'S'>,
619 &ARBDecompiler::Binary<MIN_S>,
620 &ARBDecompiler::Binary<MAX_S>,
621
622 &ARBDecompiler::Unary<TRUNC_S>,
623 &ARBDecompiler::Unary<MOV_U>,
624 &ARBDecompiler::Binary<SHL_S>,
625 &ARBDecompiler::Binary<SHR_U>,
626 &ARBDecompiler::Binary<SHR_S>,
627 &ARBDecompiler::Binary<AND_S>,
628 &ARBDecompiler::Binary<OR_S>,
629 &ARBDecompiler::Binary<XOR_S>,
630 &ARBDecompiler::Unary<NOT_S>,
631 &ARBDecompiler::BitfieldInsert<'S'>,
632 &ARBDecompiler::BitfieldExtract<'S'>,
633 &ARBDecompiler::Unary<BTC_S>,
634 &ARBDecompiler::Unary<BTFM_S>,
635
636 &ARBDecompiler::Binary<ADD_U>,
637 &ARBDecompiler::Binary<MUL_U>,
638 &ARBDecompiler::Binary<DIV_U>,
639 &ARBDecompiler::Binary<MIN_U>,
640 &ARBDecompiler::Binary<MAX_U>,
641 &ARBDecompiler::Unary<TRUNC_U>,
642 &ARBDecompiler::Unary<MOV_U>,
643 &ARBDecompiler::Binary<SHL_U>,
644 &ARBDecompiler::Binary<SHR_U>,
645 &ARBDecompiler::Binary<SHR_U>,
646 &ARBDecompiler::Binary<AND_U>,
647 &ARBDecompiler::Binary<OR_U>,
648 &ARBDecompiler::Binary<XOR_U>,
649 &ARBDecompiler::Unary<NOT_U>,
650 &ARBDecompiler::BitfieldInsert<'U'>,
651 &ARBDecompiler::BitfieldExtract<'U'>,
652 &ARBDecompiler::Unary<BTC_U>,
653 &ARBDecompiler::Unary<BTFM_U>,
654
655 &ARBDecompiler::HAdd2,
656 &ARBDecompiler::HMul2,
657 &ARBDecompiler::HFma2,
658 &ARBDecompiler::HAbsolute,
659 &ARBDecompiler::HNegate,
660 &ARBDecompiler::HClamp,
661 &ARBDecompiler::HCastFloat,
662 &ARBDecompiler::HUnpack,
663 &ARBDecompiler::HMergeF32,
664 &ARBDecompiler::HMergeH0,
665 &ARBDecompiler::HMergeH1,
666 &ARBDecompiler::HPack2,
667
668 &ARBDecompiler::LogicalAssign,
669 &ARBDecompiler::Binary<AND_U>,
670 &ARBDecompiler::Binary<OR_U>,
671 &ARBDecompiler::Binary<XOR_U>,
672 &ARBDecompiler::Unary<NOT_U>,
673 &ARBDecompiler::LogicalPick2,
674 &ARBDecompiler::LogicalAnd2,
675
676 &ARBDecompiler::FloatComparison<SLT_F, false>,
677 &ARBDecompiler::FloatComparison<SEQ_F, false>,
678 &ARBDecompiler::FloatComparison<SLE_F, false>,
679 &ARBDecompiler::FloatComparison<SGT_F, false>,
680 &ARBDecompiler::FloatComparison<SNE_F, false>,
681 &ARBDecompiler::FloatComparison<SGE_F, false>,
682 &ARBDecompiler::FloatOrdered,
683 &ARBDecompiler::FloatUnordered,
684 &ARBDecompiler::FloatComparison<SLT_F, true>,
685 &ARBDecompiler::FloatComparison<SEQ_F, true>,
686 &ARBDecompiler::FloatComparison<SLE_F, true>,
687 &ARBDecompiler::FloatComparison<SGT_F, true>,
688 &ARBDecompiler::FloatComparison<SNE_F, true>,
689 &ARBDecompiler::FloatComparison<SGE_F, true>,
690
691 &ARBDecompiler::Binary<SLT_S>,
692 &ARBDecompiler::Binary<SEQ_S>,
693 &ARBDecompiler::Binary<SLE_S>,
694 &ARBDecompiler::Binary<SGT_S>,
695 &ARBDecompiler::Binary<SNE_S>,
696 &ARBDecompiler::Binary<SGE_S>,
697
698 &ARBDecompiler::Binary<SLT_U>,
699 &ARBDecompiler::Binary<SEQ_U>,
700 &ARBDecompiler::Binary<SLE_U>,
701 &ARBDecompiler::Binary<SGT_U>,
702 &ARBDecompiler::Binary<SNE_U>,
703 &ARBDecompiler::Binary<SGE_U>,
704
705 &ARBDecompiler::LogicalAddCarry,
706
707 &ARBDecompiler::HalfComparison<SLT_F, false>,
708 &ARBDecompiler::HalfComparison<SEQ_F, false>,
709 &ARBDecompiler::HalfComparison<SLE_F, false>,
710 &ARBDecompiler::HalfComparison<SGT_F, false>,
711 &ARBDecompiler::HalfComparison<SNE_F, false>,
712 &ARBDecompiler::HalfComparison<SGE_F, false>,
713 &ARBDecompiler::HalfComparison<SLT_F, true>,
714 &ARBDecompiler::HalfComparison<SEQ_F, true>,
715 &ARBDecompiler::HalfComparison<SLE_F, true>,
716 &ARBDecompiler::HalfComparison<SGT_F, true>,
717 &ARBDecompiler::HalfComparison<SNE_F, true>,
718 &ARBDecompiler::HalfComparison<SGE_F, true>,
719
720 &ARBDecompiler::Texture,
721 &ARBDecompiler::Texture,
722 &ARBDecompiler::TextureGather,
723 &ARBDecompiler::TextureQueryDimensions,
724 &ARBDecompiler::TextureQueryLod,
725 &ARBDecompiler::TexelFetch,
726 &ARBDecompiler::TextureGradient,
727
728 &ARBDecompiler::ImageLoad,
729 &ARBDecompiler::ImageStore,
730
731 &ARBDecompiler::AtomicImage<ADD, U32>,
732 &ARBDecompiler::AtomicImage<AND, U32>,
733 &ARBDecompiler::AtomicImage<OR, U32>,
734 &ARBDecompiler::AtomicImage<XOR, U32>,
735 &ARBDecompiler::AtomicImage<EXCH, U32>,
736
737 &ARBDecompiler::Atomic<EXCH, U32>,
738 &ARBDecompiler::Atomic<ADD, U32>,
739 &ARBDecompiler::Atomic<MIN, U32>,
740 &ARBDecompiler::Atomic<MAX, U32>,
741 &ARBDecompiler::Atomic<AND, U32>,
742 &ARBDecompiler::Atomic<OR, U32>,
743 &ARBDecompiler::Atomic<XOR, U32>,
744
745 &ARBDecompiler::Atomic<EXCH, S32>,
746 &ARBDecompiler::Atomic<ADD, S32>,
747 &ARBDecompiler::Atomic<MIN, S32>,
748 &ARBDecompiler::Atomic<MAX, S32>,
749 &ARBDecompiler::Atomic<AND, S32>,
750 &ARBDecompiler::Atomic<OR, S32>,
751 &ARBDecompiler::Atomic<XOR, S32>,
752
753 &ARBDecompiler::Atomic<ADD, U32>,
754 &ARBDecompiler::Atomic<MIN, U32>,
755 &ARBDecompiler::Atomic<MAX, U32>,
756 &ARBDecompiler::Atomic<AND, U32>,
757 &ARBDecompiler::Atomic<OR, U32>,
758 &ARBDecompiler::Atomic<XOR, U32>,
759
760 &ARBDecompiler::Atomic<ADD, S32>,
761 &ARBDecompiler::Atomic<MIN, S32>,
762 &ARBDecompiler::Atomic<MAX, S32>,
763 &ARBDecompiler::Atomic<AND, S32>,
764 &ARBDecompiler::Atomic<OR, S32>,
765 &ARBDecompiler::Atomic<XOR, S32>,
766
767 &ARBDecompiler::Branch,
768 &ARBDecompiler::BranchIndirect,
769 &ARBDecompiler::PushFlowStack,
770 &ARBDecompiler::PopFlowStack,
771 &ARBDecompiler::Exit,
772 &ARBDecompiler::Discard,
773
774 &ARBDecompiler::EmitVertex,
775 &ARBDecompiler::EndPrimitive,
776
777 &ARBDecompiler::InvocationId,
778 &ARBDecompiler::YNegate,
779 &ARBDecompiler::LocalInvocationId<'x'>,
780 &ARBDecompiler::LocalInvocationId<'y'>,
781 &ARBDecompiler::LocalInvocationId<'z'>,
782 &ARBDecompiler::WorkGroupId<'x'>,
783 &ARBDecompiler::WorkGroupId<'y'>,
784 &ARBDecompiler::WorkGroupId<'z'>,
785
786 &ARBDecompiler::Unary<TGBALLOT_U>,
787 &ARBDecompiler::Unary<TGALL_U>,
788 &ARBDecompiler::Unary<TGANY_U>,
789 &ARBDecompiler::Unary<TGEQ_U>,
790
791 &ARBDecompiler::ThreadId,
792 &ARBDecompiler::ThreadMask<'e', 'q'>,
793 &ARBDecompiler::ThreadMask<'g', 'e'>,
794 &ARBDecompiler::ThreadMask<'g', 't'>,
795 &ARBDecompiler::ThreadMask<'l', 'e'>,
796 &ARBDecompiler::ThreadMask<'l', 't'>,
797 &ARBDecompiler::ShuffleIndexed,
798
799 &ARBDecompiler::Barrier,
800 &ARBDecompiler::MemoryBarrierGroup,
801 &ARBDecompiler::MemoryBarrierGlobal,
802 };
803};
804
805ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
806 ShaderType stage_, std::string_view identifier)
807 : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
808 DefineGlobalMemory();
809
810 AddLine("TEMP RC;");
811 AddLine("TEMP FSWZA[4];");
812 AddLine("TEMP FSWZB[4];");
813 if (ir.IsDecompiled()) {
814 DecompileAST();
815 } else {
816 DecompileBranchMode();
817 }
818 AddLine("END");
819
820 const std::string code = std::move(shader_source);
821 DeclareHeader();
822 DeclareVertex();
823 DeclareGeometry();
824 DeclareFragment();
825 DeclareCompute();
826 DeclareInputAttributes();
827 DeclareOutputAttributes();
828 DeclareLocalMemory();
829 DeclareGlobalMemory();
830 DeclareConstantBuffers();
831 DeclareRegisters();
832 DeclareTemporaries();
833 DeclarePredicates();
834 DeclareInternalFlags();
835
836 shader_source += code;
837}
838
839std::string_view HeaderStageName(ShaderType stage) {
840 switch (stage) {
841 case ShaderType::Vertex:
842 return "vp";
843 case ShaderType::Geometry:
844 return "gp";
845 case ShaderType::Fragment:
846 return "fp";
847 case ShaderType::Compute:
848 return "cp";
849 default:
850 UNREACHABLE();
851 return "";
852 }
853}
854
855void ARBDecompiler::DefineGlobalMemory() {
856 u32 binding = 0;
857 for (const auto& pair : ir.GetGlobalMemory()) {
858 const GlobalMemoryBase base = pair.first;
859 global_memory_names.emplace(base, binding);
860 ++binding;
861 }
862}
863
864void ARBDecompiler::DeclareHeader() {
865 AddLine("!!NV{}5.0", HeaderStageName(stage));
866 // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
867 AddLine("OPTION NV_internal;");
868 AddLine("OPTION NV_gpu_program_fp64;");
869 AddLine("OPTION NV_shader_thread_group;");
870 if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
871 AddLine("OPTION NV_shader_thread_shuffle;");
872 }
873 if (stage == ShaderType::Vertex) {
874 if (device.HasNvViewportArray2()) {
875 AddLine("OPTION NV_viewport_array2;");
876 }
877 }
878 if (stage == ShaderType::Fragment) {
879 AddLine("OPTION ARB_draw_buffers;");
880 }
881 if (device.HasImageLoadFormatted()) {
882 AddLine("OPTION EXT_shader_image_load_formatted;");
883 }
884}
885
886void ARBDecompiler::DeclareVertex() {
887 if (stage != ShaderType::Vertex) {
888 return;
889 }
890 AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};");
891}
892
893void ARBDecompiler::DeclareGeometry() {
894 if (stage != ShaderType::Geometry) {
895 return;
896 }
897 const auto& info = registry.GetGraphicsInfo();
898 const auto& header = ir.GetHeader();
899 AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology));
900 AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology));
901 AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value());
902 AddLine("ATTRIB vertex_position = vertex.position;");
903}
904
905void ARBDecompiler::DeclareFragment() {
906 if (stage != ShaderType::Fragment) {
907 return;
908 }
909 AddLine("OUTPUT result_color7 = result.color[7];");
910 AddLine("OUTPUT result_color6 = result.color[6];");
911 AddLine("OUTPUT result_color5 = result.color[5];");
912 AddLine("OUTPUT result_color4 = result.color[4];");
913 AddLine("OUTPUT result_color3 = result.color[3];");
914 AddLine("OUTPUT result_color2 = result.color[2];");
915 AddLine("OUTPUT result_color1 = result.color[1];");
916 AddLine("OUTPUT result_color0 = result.color;");
917}
918
919void ARBDecompiler::DeclareCompute() {
920 if (stage != ShaderType::Compute) {
921 return;
922 }
923 const ComputeInfo& info = registry.GetComputeInfo();
924 AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
925 info.workgroup_size[2]);
926 if (info.shared_memory_size_in_words == 0) {
927 return;
928 }
929 const u32 limit = device.GetMaxComputeSharedMemorySize();
930 u32 size_in_bytes = info.shared_memory_size_in_words * 4;
931 if (size_in_bytes > limit) {
932 LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
933 size_in_bytes, limit);
934 size_in_bytes = limit;
935 }
936
937 AddLine("SHARED_MEMORY {};", size_in_bytes);
938 AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
939}
940
941void ARBDecompiler::DeclareInputAttributes() {
942 if (stage == ShaderType::Compute) {
943 return;
944 }
945 const std::string_view stage_name = StageInputName(stage);
946 for (const auto attribute : ir.GetInputAttributes()) {
947 if (!IsGenericAttribute(attribute)) {
948 continue;
949 }
950 const u32 index = GetGenericAttributeIndex(attribute);
951
952 std::string_view suffix;
953 if (stage == ShaderType::Fragment) {
954 const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)};
955 if (input_mode == PixelImap::Unused) {
956 return;
957 }
958 suffix = GetInputFlags(input_mode);
959 }
960 AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index,
961 index);
962 }
963}
964
965void ARBDecompiler::DeclareOutputAttributes() {
966 if (stage == ShaderType::Compute) {
967 return;
968 }
969 for (const auto attribute : ir.GetOutputAttributes()) {
970 if (!IsGenericAttribute(attribute)) {
971 continue;
972 }
973 const u32 index = GetGenericAttributeIndex(attribute);
974 AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index);
975 }
976}
977
978void ARBDecompiler::DeclareLocalMemory() {
979 u64 size = 0;
980 if (stage == ShaderType::Compute) {
981 size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
982 } else {
983 size = ir.GetHeader().GetLocalMemorySize();
984 }
985 if (size == 0) {
986 return;
987 }
988 const u64 element_count = Common::AlignUp(size, 4) / 4;
989 AddLine("TEMP lmem[{}];", element_count);
990}
991
992void ARBDecompiler::DeclareGlobalMemory() {
993 const size_t num_entries = ir.GetGlobalMemory().size();
994 if (num_entries > 0) {
995 AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1);
996 }
997}
998
999void ARBDecompiler::DeclareConstantBuffers() {
1000 u32 binding = 0;
1001 for (const auto& cbuf : ir.GetConstantBuffers()) {
1002 AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding);
1003 ++binding;
1004 }
1005}
1006
1007void ARBDecompiler::DeclareRegisters() {
1008 for (const u32 gpr : ir.GetRegisters()) {
1009 AddLine("TEMP R{};", gpr);
1010 }
1011}
1012
1013void ARBDecompiler::DeclareTemporaries() {
1014 for (std::size_t i = 0; i < max_temporaries; ++i) {
1015 AddLine("TEMP T{};", i);
1016 }
1017 for (std::size_t i = 0; i < max_long_temporaries; ++i) {
1018 AddLine("LONG TEMP L{};", i);
1019 }
1020}
1021
1022void ARBDecompiler::DeclarePredicates() {
1023 for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
1024 AddLine("TEMP P{};", static_cast<u64>(pred));
1025 }
1026}
1027
1028void ARBDecompiler::DeclareInternalFlags() {
1029 for (const char* name : INTERNAL_FLAG_NAMES) {
1030 AddLine("TEMP {};", name);
1031 }
1032}
1033
1034void ARBDecompiler::InitializeVariables() {
1035 AddLine("MOV.F32 FSWZA[0], -1;");
1036 AddLine("MOV.F32 FSWZA[1], 1;");
1037 AddLine("MOV.F32 FSWZA[2], -1;");
1038 AddLine("MOV.F32 FSWZA[3], 0;");
1039 AddLine("MOV.F32 FSWZB[0], -1;");
1040 AddLine("MOV.F32 FSWZB[1], -1;");
1041 AddLine("MOV.F32 FSWZB[2], 1;");
1042 AddLine("MOV.F32 FSWZB[3], -1;");
1043
1044 if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) {
1045 AddLine("MOV.F result.position, {{0, 0, 0, 1}};");
1046 }
1047 for (const auto attribute : ir.GetOutputAttributes()) {
1048 if (!IsGenericAttribute(attribute)) {
1049 continue;
1050 }
1051 const u32 index = GetGenericAttributeIndex(attribute);
1052 AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index);
1053 }
1054 for (const u32 gpr : ir.GetRegisters()) {
1055 AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr);
1056 }
1057 for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
1058 AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred));
1059 }
1060}
1061
1062void ARBDecompiler::DecompileAST() {
1063 const u32 num_flow_variables = ir.GetASTNumVariables();
1064 for (u32 i = 0; i < num_flow_variables; ++i) {
1065 AddLine("TEMP F{};", i);
1066 }
1067 for (u32 i = 0; i < num_flow_variables; ++i) {
1068 AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
1069 }
1070
1071 InitializeVariables();
1072
1073 VisitAST(ir.GetASTProgram());
1074}
1075
1076void ARBDecompiler::DecompileBranchMode() {
1077 static constexpr u32 FLOW_STACK_SIZE = 20;
1078 if (!ir.IsFlowStackDisabled()) {
1079 AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
1080 AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
1081 AddLine("TEMP SSY_TOP;");
1082 AddLine("TEMP PBK_TOP;");
1083 }
1084
1085 AddLine("TEMP PC;");
1086
1087 if (!ir.IsFlowStackDisabled()) {
1088 AddLine("MOV.U SSY_TOP.x, 0;");
1089 AddLine("MOV.U PBK_TOP.x, 0;");
1090 }
1091
1092 InitializeVariables();
1093
1094 const auto basic_block_end = ir.GetBasicBlocks().end();
1095 auto basic_block_it = ir.GetBasicBlocks().begin();
1096 const u32 first_address = basic_block_it->first;
1097 AddLine("MOV.U PC.x, {};", first_address);
1098
1099 AddLine("REP;");
1100
1101 std::size_t num_blocks = 0;
1102 while (basic_block_it != basic_block_end) {
1103 const auto& [address, bb] = *basic_block_it;
1104 ++num_blocks;
1105
1106 AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
1107 AddLine("IF NE.x;");
1108
1109 VisitBlock(bb);
1110
1111 ++basic_block_it;
1112
1113 if (basic_block_it != basic_block_end) {
1114 const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
1115 if (!op || op->GetCode() != OperationCode::Branch) {
1116 const u32 next_address = basic_block_it->first;
1117 AddLine("MOV.U PC.x, {};", next_address);
1118 AddLine("CONT;");
1119 }
1120 }
1121
1122 AddLine("ELSE;");
1123 }
1124 AddLine("RET;");
1125 while (num_blocks--) {
1126 AddLine("ENDIF;");
1127 }
1128
1129 AddLine("ENDREP;");
1130}
1131
1132void ARBDecompiler::VisitAST(const ASTNode& node) {
1133 if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) {
1134 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1135 VisitAST(current);
1136 }
1137 } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
1138 const std::string condition = VisitExpression(if_then->condition);
1139 ResetTemporaries();
1140
1141 AddLine("MOVC.U RC.x, {};", condition);
1142 AddLine("IF NE.x;");
1143 for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) {
1144 VisitAST(current);
1145 }
1146 AddLine("ENDIF;");
1147 } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
1148 AddLine("ELSE;");
1149 for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) {
1150 VisitAST(current);
1151 }
1152 } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
1153 VisitBlock(decoded->nodes);
1154 } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
1155 AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition));
1156 ResetTemporaries();
1157 } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
1158 const std::string condition = VisitExpression(do_while->condition);
1159 ResetTemporaries();
1160 AddLine("REP;");
1161 for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) {
1162 VisitAST(current);
1163 }
1164 AddLine("MOVC.U RC.x, {};", condition);
1165 AddLine("BRK (NE.x);");
1166 AddLine("ENDREP;");
1167 } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) {
1168 const bool is_true = ExprIsTrue(ast_return->condition);
1169 if (!is_true) {
1170 AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition));
1171 AddLine("IF NE.x;");
1172 ResetTemporaries();
1173 }
1174 if (ast_return->kills) {
1175 AddLine("KIL TR;");
1176 } else {
1177 Exit();
1178 }
1179 if (!is_true) {
1180 AddLine("ENDIF;");
1181 }
1182 } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) {
1183 if (ExprIsTrue(ast_break->condition)) {
1184 AddLine("BRK;");
1185 } else {
1186 AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition));
1187 AddLine("BRK (NE.x);");
1188 ResetTemporaries();
1189 }
1190 } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) {
1191 // Nothing to do
1192 } else {
1193 UNREACHABLE();
1194 }
1195}
1196
1197std::string ARBDecompiler::VisitExpression(const Expr& node) {
1198 if (const auto expr = std::get_if<ExprAnd>(&*node)) {
1199 std::string result = AllocTemporary();
1200 AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
1201 VisitExpression(expr->operand2));
1202 return result;
1203 }
1204 if (const auto expr = std::get_if<ExprOr>(&*node)) {
1205 std::string result = AllocTemporary();
1206 AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
1207 VisitExpression(expr->operand2));
1208 return result;
1209 }
1210 if (const auto expr = std::get_if<ExprNot>(&*node)) {
1211 std::string result = AllocTemporary();
1212 AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
1213 return result;
1214 }
1215 if (const auto expr = std::get_if<ExprPredicate>(&*node)) {
1216 return fmt::format("P{}.x", static_cast<u64>(expr->predicate));
1217 }
1218 if (const auto expr = std::get_if<ExprCondCode>(&*node)) {
1219 return Visit(ir.GetConditionCode(expr->cc));
1220 }
1221 if (const auto expr = std::get_if<ExprVar>(&*node)) {
1222 return fmt::format("F{}.x", expr->var_index);
1223 }
1224 if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
1225 return expr->value ? "0xffffffff" : "0";
1226 }
1227 if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
1228 std::string result = AllocTemporary();
1229 AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
1230 return result;
1231 }
1232 UNREACHABLE();
1233 return "0";
1234}
1235
1236void ARBDecompiler::VisitBlock(const NodeBlock& bb) {
1237 for (const auto& node : bb) {
1238 Visit(node);
1239 }
1240}
1241
1242std::string ARBDecompiler::Visit(const Node& node) {
1243 if (const auto operation = std::get_if<OperationNode>(&*node)) {
1244 if (const auto amend_index = operation->GetAmendIndex()) {
1245 Visit(ir.GetAmendNode(*amend_index));
1246 }
1247 const std::size_t index = static_cast<std::size_t>(operation->GetCode());
1248 if (index >= OPERATION_DECOMPILERS.size()) {
1249 UNREACHABLE_MSG("Out of bounds operation: {}", index);
1250 return {};
1251 }
1252 const auto decompiler = OPERATION_DECOMPILERS[index];
1253 if (decompiler == nullptr) {
1254 UNREACHABLE_MSG("Undefined operation: {}", index);
1255 return {};
1256 }
1257 return (this->*decompiler)(*operation);
1258 }
1259
1260 if (const auto gpr = std::get_if<GprNode>(&*node)) {
1261 const u32 index = gpr->GetIndex();
1262 if (index == Register::ZeroIndex) {
1263 return "{0, 0, 0, 0}.x";
1264 }
1265 return fmt::format("R{}.x", index);
1266 }
1267
1268 if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
1269 return fmt::format("CV{}.x", cv->GetIndex());
1270 }
1271
1272 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
1273 std::string temporary = AllocTemporary();
1274 AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
1275 return temporary;
1276 }
1277
1278 if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
1279 std::string temporary = AllocTemporary();
1280 switch (const auto index = predicate->GetIndex(); index) {
1281 case Tegra::Shader::Pred::UnusedIndex:
1282 AddLine("MOV.S {}, -1;", temporary);
1283 break;
1284 case Tegra::Shader::Pred::NeverExecute:
1285 AddLine("MOV.S {}, 0;", temporary);
1286 break;
1287 default:
1288 AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index));
1289 break;
1290 }
1291 if (predicate->IsNegated()) {
1292 AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary);
1293 }
1294 return temporary;
1295 }
1296
1297 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
1298 if (abuf->IsPhysicalBuffer()) {
1299 UNIMPLEMENTED_MSG("Physical buffers are not implemented");
1300 return "{0, 0, 0, 0}.x";
1301 }
1302
1303 const Attribute::Index index = abuf->GetIndex();
1304 const u32 element = abuf->GetElement();
1305 const char swizzle = Swizzle(element);
1306 switch (index) {
1307 case Attribute::Index::Position: {
1308 if (stage == ShaderType::Geometry) {
1309 return fmt::format("{}_position[{}].{}", StageInputName(stage),
1310 Visit(abuf->GetBuffer()), swizzle);
1311 } else {
1312 return fmt::format("{}.position.{}", StageInputName(stage), swizzle);
1313 }
1314 }
1315 case Attribute::Index::TessCoordInstanceIDVertexID:
1316 ASSERT(stage == ShaderType::Vertex);
1317 switch (element) {
1318 case 2:
1319 return "vertex.instance";
1320 case 3:
1321 return "vertex.id";
1322 }
1323 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
1324 break;
1325 case Attribute::Index::PointCoord:
1326 switch (element) {
1327 case 0:
1328 return "fragment.pointcoord.x";
1329 case 1:
1330 return "fragment.pointcoord.y";
1331 }
1332 UNIMPLEMENTED();
1333 break;
1334 case Attribute::Index::FrontFacing: {
1335 ASSERT(stage == ShaderType::Fragment);
1336 ASSERT(element == 3);
1337 const std::string temporary = AllocVectorTemporary();
1338 AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};");
1339 AddLine("MOV.U.CC RC.x, -RC;");
1340 AddLine("MOV.S {}.x, 0;", temporary);
1341 AddLine("MOV.S {}.x (NE.x), -1;", temporary);
1342 return fmt::format("{}.x", temporary);
1343 }
1344 default:
1345 if (IsGenericAttribute(index)) {
1346 if (stage == ShaderType::Geometry) {
1347 return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index),
1348 Visit(abuf->GetBuffer()), swizzle);
1349 } else {
1350 return fmt::format("{}.attrib[{}].{}", StageInputName(stage),
1351 GetGenericAttributeIndex(index), swizzle);
1352 }
1353 }
1354 UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index);
1355 break;
1356 }
1357 return "{0, 0, 0, 0}.x";
1358 }
1359
1360 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1361 std::string offset_string;
1362 const auto& offset = cbuf->GetOffset();
1363 if (const auto imm = std::get_if<ImmediateNode>(&*offset)) {
1364 offset_string = std::to_string(imm->GetValue());
1365 } else {
1366 offset_string = Visit(offset);
1367 }
1368 std::string temporary = AllocTemporary();
1369 AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
1370 return temporary;
1371 }
1372
1373 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1374 std::string temporary = AllocTemporary();
1375 AddLine("MOV {}, 0;", temporary);
1376 AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem));
1377 return temporary;
1378 }
1379
1380 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
1381 std::string temporary = Visit(lmem->GetAddress());
1382 AddLine("SHR.U {}, {}, 2;", temporary, temporary);
1383 AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
1384 return temporary;
1385 }
1386
1387 if (const auto smem = std::get_if<SmemNode>(&*node)) {
1388 std::string temporary = Visit(smem->GetAddress());
1389 AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
1390 return temporary;
1391 }
1392
1393 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
1394 const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
1395 return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
1396 }
1397
1398 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
1399 if (const auto amend_index = conditional->GetAmendIndex()) {
1400 Visit(ir.GetAmendNode(*amend_index));
1401 }
1402 AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition()));
1403 AddLine("IF NE.x;");
1404 VisitBlock(conditional->GetCode());
1405 AddLine("ENDIF;");
1406 return {};
1407 }
1408
1409 if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) {
1410 // Uncommenting this will generate invalid code. GLASM lacks comments.
1411 // AddLine("// {}", cmt->GetText());
1412 return {};
1413 }
1414
1415 UNIMPLEMENTED();
1416 return {};
1417}
1418
1419std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
1420 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1421 UNIMPLEMENTED_IF(meta.sampler.is_indexed);
1422
1423 const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array &&
1424 meta.sampler.type == Tegra::Shader::TextureType::TextureCube;
1425 const std::size_t count = operation.GetOperandsCount();
1426 std::string temporary = AllocVectorTemporary();
1427 std::size_t i = 0;
1428 for (; i < count; ++i) {
1429 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1430 }
1431 if (meta.sampler.is_array) {
1432 AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array));
1433 ++i;
1434 }
1435 if (meta.sampler.is_shadow) {
1436 std::string compare = Visit(meta.depth_compare);
1437 if (is_extended) {
1438 ASSERT(i == 4);
1439 std::string extra_coord = AllocVectorTemporary();
1440 AddLine("MOV.F {}.x, {};", extra_coord, compare);
1441 return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0};
1442 }
1443 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare);
1444 ++i;
1445 }
1446 return {temporary, temporary, i};
1447}
1448
1449std::string ARBDecompiler::BuildAoffi(Operation operation) {
1450 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1451 if (meta.aoffi.empty()) {
1452 return {};
1453 }
1454 const std::string temporary = AllocVectorTemporary();
1455 std::size_t i = 0;
1456 for (auto& node : meta.aoffi) {
1457 AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node));
1458 }
1459 return fmt::format(", offset({})", temporary);
1460}
1461
1462std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
1463 // Read a bindless SSBO, return its address and set CC accordingly
1464 // address = c[binding].xy
1465 // length = c[binding].z
1466 const u32 binding = global_memory_names.at(gmem.GetDescriptor());
1467
1468 const std::string pointer = AllocLongVectorTemporary();
1469 std::string temporary = AllocTemporary();
1470
1471 AddLine("PK64.U {}, c[{}];", pointer, binding);
1472 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
1473 Visit(gmem.GetBaseAddress()));
1474 AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
1475 AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer);
1476 // Compare offset to length and set CC
1477 AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding);
1478 return fmt::format("{}.x", pointer);
1479}
1480
1481void ARBDecompiler::Exit() {
1482 if (stage != ShaderType::Fragment) {
1483 AddLine("RET;");
1484 return;
1485 }
1486
1487 const auto safe_get_register = [this](u32 reg) -> std::string {
1488 if (ir.GetRegisters().contains(reg)) {
1489 return fmt::format("R{}.x", reg);
1490 }
1491 return "{0, 0, 0, 0}.x";
1492 };
1493
1494 const auto& header = ir.GetHeader();
1495 u32 current_reg = 0;
1496 for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) {
1497 for (u32 component = 0; component < 4; ++component) {
1498 if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
1499 continue;
1500 }
1501 AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component),
1502 safe_get_register(current_reg));
1503 ++current_reg;
1504 }
1505 }
1506 if (header.ps.omap.depth) {
1507 AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1));
1508 }
1509
1510 AddLine("RET;");
1511}
1512
1513std::string ARBDecompiler::Assign(Operation operation) {
1514 const Node& dest = operation[0];
1515 const Node& src = operation[1];
1516
1517 std::string dest_name;
1518 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1519 if (gpr->GetIndex() == Register::ZeroIndex) {
1520 // Writing to Register::ZeroIndex is a no op
1521 return {};
1522 }
1523 dest_name = fmt::format("R{}.x", gpr->GetIndex());
1524 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
1525 const u32 element = abuf->GetElement();
1526 const char swizzle = Swizzle(element);
1527 switch (const Attribute::Index index = abuf->GetIndex()) {
1528 case Attribute::Index::Position:
1529 dest_name = fmt::format("result.position.{}", swizzle);
1530 break;
1531 case Attribute::Index::LayerViewportPointSize:
1532 switch (element) {
1533 case 0:
1534 UNIMPLEMENTED();
1535 return {};
1536 case 1:
1537 case 2:
1538 if (!device.HasNvViewportArray2()) {
1539 LOG_ERROR(
1540 Render_OpenGL,
1541 "NV_viewport_array2 is missing. Maxwell gen 2 or better is required.");
1542 return {};
1543 }
1544 dest_name = element == 1 ? "result.layer.x" : "result.viewport.x";
1545 break;
1546 case 3:
1547 dest_name = "result.pointsize.x";
1548 break;
1549 }
1550 break;
1551 case Attribute::Index::ClipDistances0123:
1552 dest_name = fmt::format("result.clip[{}].x", element);
1553 break;
1554 case Attribute::Index::ClipDistances4567:
1555 dest_name = fmt::format("result.clip[{}].x", element + 4);
1556 break;
1557 default:
1558 if (!IsGenericAttribute(index)) {
1559 UNREACHABLE();
1560 return {};
1561 }
1562 dest_name =
1563 fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle);
1564 break;
1565 }
1566 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1567 const std::string address = Visit(lmem->GetAddress());
1568 AddLine("SHR.U {}, {}, 2;", address, address);
1569 dest_name = fmt::format("lmem[{}].x", address);
1570 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1571 AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress()));
1572 ResetTemporaries();
1573 return {};
1574 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1575 AddLine("IF NE.x;");
1576 AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
1577 AddLine("ENDIF;");
1578 ResetTemporaries();
1579 return {};
1580 } else {
1581 UNREACHABLE();
1582 ResetTemporaries();
1583 return {};
1584 }
1585
1586 AddLine("MOV.U {}, {};", dest_name, Visit(src));
1587 ResetTemporaries();
1588 return {};
1589}
1590
1591std::string ARBDecompiler::Select(Operation operation) {
1592 std::string temporary = AllocTemporary();
1593 AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
1594 Visit(operation[2]));
1595 return temporary;
1596}
1597
1598std::string ARBDecompiler::FClamp(Operation operation) {
1599 // 1.0f in hex, replace with std::bit_cast on C++20
1600 static constexpr u32 POSITIVE_ONE = 0x3f800000;
1601
1602 std::string temporary = AllocTemporary();
1603 const Node& value = operation[0];
1604 const Node& low = operation[1];
1605 const Node& high = operation[2];
1606 const auto* const imm_low = std::get_if<ImmediateNode>(&*low);
1607 const auto* const imm_high = std::get_if<ImmediateNode>(&*high);
1608 if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
1609 AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
1610 } else {
1611 AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high));
1612 AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low));
1613 }
1614 return temporary;
1615}
1616
1617std::string ARBDecompiler::FCastHalf0(Operation operation) {
1618 const std::string temporary = AllocVectorTemporary();
1619 AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0]));
1620 return fmt::format("{}.x", temporary);
1621}
1622
1623std::string ARBDecompiler::FCastHalf1(Operation operation) {
1624 const std::string temporary = AllocVectorTemporary();
1625 AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0]));
1626 AddLine("MOV {}.x, {}.y;", temporary, temporary);
1627 return fmt::format("{}.x", temporary);
1628}
1629
1630std::string ARBDecompiler::FSqrt(Operation operation) {
1631 std::string temporary = AllocTemporary();
1632 AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
1633 AddLine("RCP.F32 {}, {};", temporary, temporary);
1634 return temporary;
1635}
1636
1637std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
1638 const std::string temporary = AllocVectorTemporary();
1639 if (!device.HasWarpIntrinsics()) {
1640 LOG_ERROR(Render_OpenGL,
1641 "NV_shader_thread_shuffle is missing. Kepler or better is required.");
1642 AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
1643 return fmt::format("{}.x", temporary);
1644 }
1645
1646 AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage));
1647 AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary);
1648 AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary);
1649 AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary);
1650 AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary);
1651 AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary);
1652 AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary);
1653 return fmt::format("{}.x", temporary);
1654}
1655
1656std::string ARBDecompiler::HAdd2(Operation operation) {
1657 const std::string tmp1 = AllocVectorTemporary();
1658 const std::string tmp2 = AllocVectorTemporary();
1659 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1660 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1661 AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2);
1662 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1663 return fmt::format("{}.x", tmp1);
1664}
1665
1666std::string ARBDecompiler::HMul2(Operation operation) {
1667 const std::string tmp1 = AllocVectorTemporary();
1668 const std::string tmp2 = AllocVectorTemporary();
1669 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1670 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1671 AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2);
1672 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1673 return fmt::format("{}.x", tmp1);
1674}
1675
1676std::string ARBDecompiler::HFma2(Operation operation) {
1677 const std::string tmp1 = AllocVectorTemporary();
1678 const std::string tmp2 = AllocVectorTemporary();
1679 const std::string tmp3 = AllocVectorTemporary();
1680 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1681 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1682 AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2]));
1683 AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3);
1684 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1685 return fmt::format("{}.x", tmp1);
1686}
1687
1688std::string ARBDecompiler::HAbsolute(Operation operation) {
1689 const std::string temporary = AllocVectorTemporary();
1690 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1691 AddLine("PK2H.F {}.x, |{}|;", temporary, temporary);
1692 return fmt::format("{}.x", temporary);
1693}
1694
1695std::string ARBDecompiler::HNegate(Operation operation) {
1696 const std::string temporary = AllocVectorTemporary();
1697 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1698 AddLine("MOVC.S RC.x, {};", Visit(operation[1]));
1699 AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary);
1700 AddLine("MOVC.S RC.x, {};", Visit(operation[2]));
1701 AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary);
1702 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1703 return fmt::format("{}.x", temporary);
1704}
1705
1706std::string ARBDecompiler::HClamp(Operation operation) {
1707 const std::string tmp1 = AllocVectorTemporary();
1708 const std::string tmp2 = AllocVectorTemporary();
1709 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1710 AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1]));
1711 AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
1712 AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2);
1713 AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2]));
1714 AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
1715 AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2);
1716 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1717 return fmt::format("{}.x", tmp1);
1718}
1719
1720std::string ARBDecompiler::HCastFloat(Operation operation) {
1721 const std::string temporary = AllocVectorTemporary();
1722 AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary);
1723 AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0]));
1724 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1725 return fmt::format("{}.x", temporary);
1726}
1727
1728std::string ARBDecompiler::HUnpack(Operation operation) {
1729 std::string operand = Visit(operation[0]);
1730 switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
1731 case Tegra::Shader::HalfType::H0_H1:
1732 return operand;
1733 case Tegra::Shader::HalfType::F32: {
1734 const std::string temporary = AllocVectorTemporary();
1735 AddLine("MOV.U {}.x, {};", temporary, operand);
1736 AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
1737 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1738 return fmt::format("{}.x", temporary);
1739 }
1740 case Tegra::Shader::HalfType::H0_H0: {
1741 const std::string temporary = AllocVectorTemporary();
1742 AddLine("UP2H.F {}.xy, {};", temporary, operand);
1743 AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
1744 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1745 return fmt::format("{}.x", temporary);
1746 }
1747 case Tegra::Shader::HalfType::H1_H1: {
1748 const std::string temporary = AllocVectorTemporary();
1749 AddLine("UP2H.F {}.xy, {};", temporary, operand);
1750 AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
1751 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1752 return fmt::format("{}.x", temporary);
1753 }
1754 }
1755 UNREACHABLE();
1756 return "{0, 0, 0, 0}.x";
1757}
1758
1759std::string ARBDecompiler::HMergeF32(Operation operation) {
1760 const std::string temporary = AllocVectorTemporary();
1761 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1762 return fmt::format("{}.x", temporary);
1763}
1764
1765std::string ARBDecompiler::HMergeH0(Operation operation) {
1766 const std::string temporary = AllocVectorTemporary();
1767 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1768 AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
1769 AddLine("MOV.U {}.x, {}.z;", temporary, temporary);
1770 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1771 return fmt::format("{}.x", temporary);
1772}
1773
1774std::string ARBDecompiler::HMergeH1(Operation operation) {
1775 const std::string temporary = AllocVectorTemporary();
1776 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1777 AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
1778 AddLine("MOV.U {}.y, {}.w;", temporary, temporary);
1779 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1780 return fmt::format("{}.x", temporary);
1781}
1782
1783std::string ARBDecompiler::HPack2(Operation operation) {
1784 const std::string temporary = AllocVectorTemporary();
1785 AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0]));
1786 AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1]));
1787 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1788 return fmt::format("{}.x", temporary);
1789}
1790
1791std::string ARBDecompiler::LogicalAssign(Operation operation) {
1792 const Node& dest = operation[0];
1793 const Node& src = operation[1];
1794
1795 std::string target;
1796
1797 if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
1798 ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
1799
1800 const Tegra::Shader::Pred index = pred->GetIndex();
1801 switch (index) {
1802 case Tegra::Shader::Pred::NeverExecute:
1803 case Tegra::Shader::Pred::UnusedIndex:
1804 // Writing to these predicates is a no-op
1805 return {};
1806 }
1807 target = fmt::format("P{}.x", static_cast<u64>(index));
1808 } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) {
1809 const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
1810 target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
1811 } else {
1812 UNREACHABLE();
1813 ResetTemporaries();
1814 return {};
1815 }
1816
1817 AddLine("MOV.U {}, {};", target, Visit(src));
1818 ResetTemporaries();
1819 return {};
1820}
1821
1822std::string ARBDecompiler::LogicalPick2(Operation operation) {
1823 std::string temporary = AllocTemporary();
1824 const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
1825 AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
1826 return temporary;
1827}
1828
1829std::string ARBDecompiler::LogicalAnd2(Operation operation) {
1830 std::string temporary = AllocTemporary();
1831 const std::string op = Visit(operation[0]);
1832 AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
1833 return temporary;
1834}
1835
1836std::string ARBDecompiler::FloatOrdered(Operation operation) {
1837 std::string temporary = AllocTemporary();
1838 AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
1839 AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
1840 AddLine("MOV.S {}, -1;", temporary);
1841 AddLine("MOV.S {} (NAN.x), 0;", temporary);
1842 AddLine("MOV.S {} (NAN.y), 0;", temporary);
1843 return temporary;
1844}
1845
1846std::string ARBDecompiler::FloatUnordered(Operation operation) {
1847 std::string temporary = AllocTemporary();
1848 AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
1849 AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
1850 AddLine("MOV.S {}, 0;", temporary);
1851 AddLine("MOV.S {} (NAN.x), -1;", temporary);
1852 AddLine("MOV.S {} (NAN.y), -1;", temporary);
1853 return temporary;
1854}
1855
1856std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
1857 std::string temporary = AllocTemporary();
1858 AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
1859 AddLine("MOV.S {}, 0;", temporary);
1860 AddLine("IF CF.x;");
1861 AddLine("MOV.S {}, -1;", temporary);
1862 AddLine("ENDIF;");
1863 return temporary;
1864}
1865
1866std::string ARBDecompiler::Texture(Operation operation) {
1867 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1868 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1869 const auto [coords, temporary, swizzle] = BuildCoords(operation);
1870
1871 std::string_view opcode = "TEX";
1872 std::string extra;
1873 if (meta.bias) {
1874 ASSERT(!meta.lod);
1875 opcode = "TXB";
1876
1877 if (swizzle < 4) {
1878 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias));
1879 } else {
1880 const std::string bias = AllocTemporary();
1881 AddLine("MOV.F {}, {};", bias, Visit(meta.bias));
1882 extra = fmt::format(" {},", bias);
1883 }
1884 }
1885 if (meta.lod) {
1886 ASSERT(!meta.bias);
1887 opcode = "TXL";
1888
1889 if (swizzle < 4) {
1890 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
1891 } else {
1892 const std::string lod = AllocTemporary();
1893 AddLine("MOV.F {}, {};", lod, Visit(meta.lod));
1894 extra = fmt::format(" {},", lod);
1895 }
1896 }
1897
1898 AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id,
1899 TextureType(meta), BuildAoffi(operation));
1900 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1901 return fmt::format("{}.x", temporary);
1902}
1903
1904std::string ARBDecompiler::TextureGather(Operation operation) {
1905 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1906 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1907 const auto [coords, temporary, swizzle] = BuildCoords(operation);
1908
1909 std::string comp;
1910 if (!meta.sampler.is_shadow) {
1911 const auto& immediate = std::get<ImmediateNode>(*meta.component);
1912 comp = fmt::format(".{}", Swizzle(immediate.GetValue()));
1913 }
1914
1915 AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
1916 TextureType(meta), BuildAoffi(operation));
1917 AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element));
1918 return fmt::format("{}.x", temporary);
1919}
1920
1921std::string ARBDecompiler::TextureQueryDimensions(Operation operation) {
1922 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1923 const std::string temporary = AllocVectorTemporary();
1924 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1925
1926 ASSERT(!meta.sampler.is_array);
1927
1928 const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0";
1929 AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta));
1930 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1931 return fmt::format("{}.x", temporary);
1932}
1933
1934std::string ARBDecompiler::TextureQueryLod(Operation operation) {
1935 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1936 const std::string temporary = AllocVectorTemporary();
1937 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1938
1939 ASSERT(!meta.sampler.is_array);
1940
1941 const std::size_t count = operation.GetOperandsCount();
1942 for (std::size_t i = 0; i < count; ++i) {
1943 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1944 }
1945 AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta));
1946 AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary);
1947 AddLine("TRUNC.S {}, {};", temporary, temporary);
1948 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1949 return fmt::format("{}.x", temporary);
1950}
1951
1952std::string ARBDecompiler::TexelFetch(Operation operation) {
1953 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1954 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1955 const auto [coords, temporary, swizzle] = BuildCoords(operation);
1956
1957 if (!meta.sampler.is_buffer) {
1958 ASSERT(swizzle < 4);
1959 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
1960 }
1961 AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta),
1962 BuildAoffi(operation));
1963 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1964 return fmt::format("{}.x", temporary);
1965}
1966
1967std::string ARBDecompiler::TextureGradient(Operation operation) {
1968 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1969 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1970 const std::string ddx = AllocVectorTemporary();
1971 const std::string ddy = AllocVectorTemporary();
1972 const std::string coord = std::get<1>(BuildCoords(operation));
1973
1974 const std::size_t num_components = meta.derivates.size() / 2;
1975 for (std::size_t index = 0; index < num_components; ++index) {
1976 const char swizzle = Swizzle(index);
1977 AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2]));
1978 AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1]));
1979 }
1980
1981 const std::string_view result = coord;
1982 AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id,
1983 TextureType(meta), BuildAoffi(operation));
1984 AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element));
1985 return fmt::format("{}.x", result);
1986}
1987
1988std::string ARBDecompiler::ImageLoad(Operation operation) {
1989 const auto& meta = std::get<MetaImage>(operation.GetMeta());
1990 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
1991 const std::size_t count = operation.GetOperandsCount();
1992 const std::string_view type = ImageType(meta.image.type);
1993
1994 const std::string temporary = AllocVectorTemporary();
1995 for (std::size_t i = 0; i < count; ++i) {
1996 AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1997 }
1998 AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type);
1999 AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
2000 return fmt::format("{}.x", temporary);
2001}
2002
2003std::string ARBDecompiler::ImageStore(Operation operation) {
2004 const auto& meta = std::get<MetaImage>(operation.GetMeta());
2005 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
2006 const std::size_t num_coords = operation.GetOperandsCount();
2007 const std::size_t num_values = meta.values.size();
2008 const std::string_view type = ImageType(meta.image.type);
2009
2010 const std::string coord = AllocVectorTemporary();
2011 const std::string value = AllocVectorTemporary();
2012 for (std::size_t i = 0; i < num_coords; ++i) {
2013 AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
2014 }
2015 for (std::size_t i = 0; i < num_values; ++i) {
2016 AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
2017 }
2018 AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type);
2019 return {};
2020}
2021
2022std::string ARBDecompiler::Branch(Operation operation) {
2023 const auto target = std::get<ImmediateNode>(*operation[0]);
2024 AddLine("MOV.U PC.x, {};", target.GetValue());
2025 AddLine("CONT;");
2026 return {};
2027}
2028
2029std::string ARBDecompiler::BranchIndirect(Operation operation) {
2030 AddLine("MOV.U PC.x, {};", Visit(operation[0]));
2031 AddLine("CONT;");
2032 return {};
2033}
2034
2035std::string ARBDecompiler::PushFlowStack(Operation operation) {
2036 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
2037 const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue();
2038 const std::string_view stack_name = StackName(stack);
2039 AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target);
2040 AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
2041 return {};
2042}
2043
2044std::string ARBDecompiler::PopFlowStack(Operation operation) {
2045 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
2046 const std::string_view stack_name = StackName(stack);
2047 AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
2048 AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
2049 AddLine("CONT;");
2050 return {};
2051}
2052
2053std::string ARBDecompiler::Exit(Operation) {
2054 Exit();
2055 return {};
2056}
2057
2058std::string ARBDecompiler::Discard(Operation) {
2059 AddLine("KIL TR;");
2060 return {};
2061}
2062
2063std::string ARBDecompiler::EmitVertex(Operation) {
2064 AddLine("EMIT;");
2065 return {};
2066}
2067
2068std::string ARBDecompiler::EndPrimitive(Operation) {
2069 AddLine("ENDPRIM;");
2070 return {};
2071}
2072
2073std::string ARBDecompiler::InvocationId(Operation) {
2074 return "primitive.invocation";
2075}
2076
2077std::string ARBDecompiler::YNegate(Operation) {
2078 LOG_WARNING(Render_OpenGL, "(STUBBED)");
2079 std::string temporary = AllocTemporary();
2080 AddLine("MOV.F {}, 1;", temporary);
2081 return temporary;
2082}
2083
2084std::string ARBDecompiler::ThreadId(Operation) {
2085 return fmt::format("{}.threadid", StageInputName(stage));
2086}
2087
2088std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
2089 if (!device.HasWarpIntrinsics()) {
2090 LOG_ERROR(Render_OpenGL,
2091 "NV_shader_thread_shuffle is missing. Kepler or better is required.");
2092 return Visit(operation[0]);
2093 }
2094 const std::string temporary = AllocVectorTemporary();
2095 AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]),
2096 Visit(operation[1]));
2097 AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
2098 return fmt::format("{}.x", temporary);
2099}
2100
2101std::string ARBDecompiler::Barrier(Operation) {
2102 AddLine("BAR;");
2103 return {};
2104}
2105
2106std::string ARBDecompiler::MemoryBarrierGroup(Operation) {
2107 AddLine("MEMBAR.CTA;");
2108 return {};
2109}
2110
2111std::string ARBDecompiler::MemoryBarrierGlobal(Operation) {
2112 AddLine("MEMBAR;");
2113 return {};
2114}
2115
2116} // Anonymous namespace
2117
2118std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
2119 const VideoCommon::Shader::Registry& registry,
2120 Tegra::Engines::ShaderType stage, std::string_view identifier) {
2121 return ARBDecompiler(device, ir, registry, stage, identifier).Code();
2122}
2123
2124} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h
deleted file mode 100644
index 6afc87220..000000000
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.h
+++ /dev/null
@@ -1,29 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <string_view>
9
10#include "common/common_types.h"
11
12namespace Tegra::Engines {
13enum class ShaderType : u32;
14}
15
16namespace VideoCommon::Shader {
17class ShaderIR;
18class Registry;
19} // namespace VideoCommon::Shader
20
21namespace OpenGL {
22
23class Device;
24
25std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
26 const VideoCommon::Shader::Registry& registry,
27 Tegra::Engines::ShaderType stage, std::string_view identifier);
28
29} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index c4189fb60..07a995f7d 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,14 +2,18 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <span> 6#include <span>
6 7
7#include "video_core/buffer_cache/buffer_cache.h" 8#include "video_core/buffer_cache/buffer_cache.h"
8#include "video_core/renderer_opengl/gl_buffer_cache.h" 9#include "video_core/renderer_opengl/gl_buffer_cache.h"
9#include "video_core/renderer_opengl/gl_device.h" 10#include "video_core/renderer_opengl/gl_device.h"
11#include "video_core/renderer_opengl/maxwell_to_gl.h"
10 12
11namespace OpenGL { 13namespace OpenGL {
12namespace { 14namespace {
15using VideoCore::Surface::PixelFormat;
16
13struct BindlessSSBO { 17struct BindlessSSBO {
14 GLuint64EXT address; 18 GLuint64EXT address;
15 GLsizei length; 19 GLsizei length;
@@ -21,6 +25,25 @@ constexpr std::array PROGRAM_LUT{
21 GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, 25 GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
22 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, 26 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
23}; 27};
28
29[[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) {
30 switch (gl_format) {
31 case GL_RGBA8_SNORM:
32 return GL_RGBA8;
33 case GL_R8_SNORM:
34 return GL_R8;
35 case GL_RGBA16_SNORM:
36 return GL_RGBA16;
37 case GL_R16_SNORM:
38 return GL_R16;
39 case GL_RG16_SNORM:
40 return GL_RG16;
41 case GL_RG8_SNORM:
42 return GL_RG8;
43 default:
44 return gl_format;
45 }
46}
24} // Anonymous namespace 47} // Anonymous namespace
25 48
26Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) 49Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
@@ -62,6 +85,30 @@ void Buffer::MakeResident(GLenum access) noexcept {
62 glMakeNamedBufferResidentNV(buffer.handle, access); 85 glMakeNamedBufferResidentNV(buffer.handle, access);
63} 86}
64 87
88GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
89 const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
90 return offset == view.offset && size == view.size && format == view.format;
91 })};
92 if (it != views.end()) {
93 return it->texture.handle;
94 }
95 OGLTexture texture;
96 texture.Create(GL_TEXTURE_BUFFER);
97 const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format};
98 const GLenum texture_format{GetTextureBufferFormat(gl_format)};
99 if (texture_format != gl_format) {
100 LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM.");
101 }
102 glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size);
103 views.push_back({
104 .offset = offset,
105 .size = size,
106 .format = format,
107 .texture = std::move(texture),
108 });
109 return views.back().texture.handle;
110}
111
65BufferCacheRuntime::BufferCacheRuntime(const Device& device_) 112BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
66 : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, 113 : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
67 use_assembly_shaders{device.UseAssemblyShaders()}, 114 use_assembly_shaders{device.UseAssemblyShaders()},
@@ -100,7 +147,7 @@ void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
100 147
101void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) { 148void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) {
102 glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast<GLintptr>(offset), 149 glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast<GLintptr>(offset),
103 static_cast<GLsizeiptr>(size / sizeof(u32)), GL_RGBA, GL_UNSIGNED_INT, 150 static_cast<GLsizeiptr>(size / sizeof(u32)), GL_RED, GL_UNSIGNED_INT,
104 &value); 151 &value);
105} 152}
106 153
@@ -144,7 +191,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff
144 glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, 191 glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
145 static_cast<GLsizeiptr>(size)); 192 static_cast<GLsizeiptr>(size));
146 } else { 193 } else {
147 const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; 194 const GLuint base_binding = graphics_base_uniform_bindings[stage];
148 const GLuint binding = base_binding + binding_index; 195 const GLuint binding = base_binding + binding_index;
149 glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), 196 glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
150 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); 197 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@@ -171,7 +218,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf
171 218
172void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, 219void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
173 u32 offset, u32 size, bool is_written) { 220 u32 offset, u32 size, bool is_written) {
174 if (use_assembly_shaders) { 221 if (use_storage_buffers) {
222 const GLuint base_binding = graphics_base_storage_bindings[stage];
223 const GLuint binding = base_binding + binding_index;
224 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
225 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
226 } else {
175 const BindlessSSBO ssbo{ 227 const BindlessSSBO ssbo{
176 .address = buffer.HostGpuAddr() + offset, 228 .address = buffer.HostGpuAddr() + offset,
177 .length = static_cast<GLsizei>(size), 229 .length = static_cast<GLsizei>(size),
@@ -180,17 +232,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
180 buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); 232 buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
181 glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, 233 glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
182 reinterpret_cast<const GLuint*>(&ssbo)); 234 reinterpret_cast<const GLuint*>(&ssbo));
183 } else {
184 const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
185 const GLuint binding = base_binding + binding_index;
186 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
187 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
188 } 235 }
189} 236}
190 237
191void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, 238void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
192 u32 size, bool is_written) { 239 u32 size, bool is_written) {
193 if (use_assembly_shaders) { 240 if (use_storage_buffers) {
241 if (size != 0) {
242 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
243 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
244 } else {
245 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
246 }
247 } else {
194 const BindlessSSBO ssbo{ 248 const BindlessSSBO ssbo{
195 .address = buffer.HostGpuAddr() + offset, 249 .address = buffer.HostGpuAddr() + offset,
196 .length = static_cast<GLsizei>(size), 250 .length = static_cast<GLsizei>(size),
@@ -199,11 +253,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf
199 buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); 253 buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
200 glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, 254 glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
201 reinterpret_cast<const GLuint*>(&ssbo)); 255 reinterpret_cast<const GLuint*>(&ssbo));
202 } else if (size == 0) {
203 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
204 } else {
205 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
206 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
207 } 256 }
208} 257}
209 258
@@ -213,4 +262,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer,
213 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); 262 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
214} 263}
215 264
265void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
266 PixelFormat format) {
267 *texture_handles++ = buffer.View(offset, size, format);
268}
269
270void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) {
271 *image_handles++ = buffer.View(offset, size, format);
272}
273
216} // namespace OpenGL 274} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index fe91aa452..060d36427 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -32,6 +32,8 @@ public:
32 32
33 void MakeResident(GLenum access) noexcept; 33 void MakeResident(GLenum access) noexcept;
34 34
35 [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
36
35 [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { 37 [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
36 return address; 38 return address;
37 } 39 }
@@ -41,9 +43,17 @@ public:
41 } 43 }
42 44
43private: 45private:
46 struct BufferView {
47 u32 offset;
48 u32 size;
49 VideoCore::Surface::PixelFormat format;
50 OGLTexture texture;
51 };
52
44 GLuint64EXT address = 0; 53 GLuint64EXT address = 0;
45 OGLBuffer buffer; 54 OGLBuffer buffer;
46 GLenum current_residency_access = GL_NONE; 55 GLenum current_residency_access = GL_NONE;
56 std::vector<BufferView> views;
47}; 57};
48 58
49class BufferCacheRuntime { 59class BufferCacheRuntime {
@@ -75,17 +85,21 @@ public:
75 85
76 void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); 86 void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
77 87
88 void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
89 VideoCore::Surface::PixelFormat format);
90
91 void BindImageBuffer(Buffer& buffer, u32 offset, u32 size,
92 VideoCore::Surface::PixelFormat format);
93
78 void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { 94 void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
95 const GLuint handle = fast_uniforms[stage][binding_index].handle;
96 const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
79 if (use_assembly_shaders) { 97 if (use_assembly_shaders) {
80 const GLuint handle = fast_uniforms[stage][binding_index].handle;
81 const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
82 glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); 98 glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
83 } else { 99 } else {
84 const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; 100 const GLuint base_binding = graphics_base_uniform_bindings[stage];
85 const GLuint binding = base_binding + binding_index; 101 const GLuint binding = base_binding + binding_index;
86 glBindBufferRange(GL_UNIFORM_BUFFER, binding, 102 glBindBufferRange(GL_UNIFORM_BUFFER, binding, handle, 0, gl_size);
87 fast_uniforms[stage][binding_index].handle, 0,
88 static_cast<GLsizeiptr>(size));
89 } 103 }
90 } 104 }
91 105
@@ -103,7 +117,7 @@ public:
103 117
104 std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { 118 std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
105 const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size)); 119 const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
106 const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; 120 const GLuint base_binding = graphics_base_uniform_bindings[stage];
107 const GLuint binding = base_binding + binding_index; 121 const GLuint binding = base_binding + binding_index;
108 glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), 122 glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
109 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); 123 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@@ -118,6 +132,27 @@ public:
118 return has_fast_buffer_sub_data; 132 return has_fast_buffer_sub_data;
119 } 133 }
120 134
135 [[nodiscard]] bool SupportsNonZeroUniformOffset() const noexcept {
136 return !use_assembly_shaders;
137 }
138
139 void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) {
140 graphics_base_uniform_bindings = bindings;
141 }
142
143 void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) {
144 graphics_base_storage_bindings = bindings;
145 }
146
147 void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) {
148 texture_handles = texture_handles_;
149 image_handles = image_handles_;
150 }
151
152 void SetEnableStorageBuffers(bool use_storage_buffers_) {
153 use_storage_buffers = use_storage_buffers_;
154 }
155
121private: 156private:
122 static constexpr std::array PABO_LUT{ 157 static constexpr std::array PABO_LUT{
123 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, 158 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
@@ -131,8 +166,15 @@ private:
131 bool use_assembly_shaders = false; 166 bool use_assembly_shaders = false;
132 bool has_unified_vertex_buffers = false; 167 bool has_unified_vertex_buffers = false;
133 168
169 bool use_storage_buffers = false;
170
134 u32 max_attributes = 0; 171 u32 max_attributes = 0;
135 172
173 std::array<GLuint, 5> graphics_base_uniform_bindings{};
174 std::array<GLuint, 5> graphics_base_storage_bindings{};
175 GLuint* texture_handles = nullptr;
176 GLuint* image_handles = nullptr;
177
136 std::optional<StreamBuffer> stream_buffer; 178 std::optional<StreamBuffer> stream_buffer;
137 179
138 std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, 180 std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
@@ -156,6 +198,7 @@ struct BufferCacheParams {
156 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; 198 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
157 static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; 199 static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
158 static constexpr bool USE_MEMORY_MAPS = false; 200 static constexpr bool USE_MEMORY_MAPS = false;
201 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
159}; 202};
160 203
161using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; 204using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
new file mode 100644
index 000000000..aa1cc592f
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -0,0 +1,209 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6
7#include "common/cityhash.h"
8#include "common/settings.h" // for enum class Settings::ShaderBackend
9#include "video_core/renderer_opengl/gl_compute_pipeline.h"
10#include "video_core/renderer_opengl/gl_shader_manager.h"
11#include "video_core/renderer_opengl/gl_shader_util.h"
12
13namespace OpenGL {
14
15using Shader::ImageBufferDescriptor;
16using Tegra::Texture::TexturePair;
17using VideoCommon::ImageId;
18
19constexpr u32 MAX_TEXTURES = 64;
20constexpr u32 MAX_IMAGES = 16;
21
22template <typename Range>
23u32 AccumulateCount(const Range& range) {
24 u32 num{};
25 for (const auto& desc : range) {
26 num += desc.count;
27 }
28 return num;
29}
30
31size_t ComputePipelineKey::Hash() const noexcept {
32 return static_cast<size_t>(
33 Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
34}
35
36bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcept {
37 return std::memcmp(this, &rhs, sizeof *this) == 0;
38}
39
40ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_,
41 BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
42 Tegra::Engines::KeplerCompute& kepler_compute_,
43 ProgramManager& program_manager_, const Shader::Info& info_,
44 std::string code, std::vector<u32> code_v)
45 : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
46 kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} {
47 switch (device.GetShaderBackend()) {
48 case Settings::ShaderBackend::GLSL:
49 source_program = CreateProgram(code, GL_COMPUTE_SHADER);
50 break;
51 case Settings::ShaderBackend::GLASM:
52 assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV);
53 break;
54 case Settings::ShaderBackend::SPIRV:
55 source_program = CreateProgram(code_v, GL_COMPUTE_SHADER);
56 break;
57 }
58 std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
59 uniform_buffer_sizes.begin());
60
61 num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors);
62 num_image_buffers = AccumulateCount(info.image_buffer_descriptors);
63
64 const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)};
65 ASSERT(num_textures <= MAX_TEXTURES);
66
67 const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)};
68 ASSERT(num_images <= MAX_IMAGES);
69
70 const bool is_glasm{assembly_program.handle != 0};
71 const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)};
72 use_storage_buffers =
73 !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
74 writes_global_memory = !use_storage_buffers &&
75 std::ranges::any_of(info.storage_buffers_descriptors,
76 [](const auto& desc) { return desc.is_written; });
77}
78
79void ComputePipeline::Configure() {
80 buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
81 buffer_cache.UnbindComputeStorageBuffers();
82 size_t ssbo_index{};
83 for (const auto& desc : info.storage_buffers_descriptors) {
84 ASSERT(desc.count == 1);
85 buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
86 desc.is_written);
87 ++ssbo_index;
88 }
89 texture_cache.SynchronizeComputeDescriptors();
90
91 std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
92 boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
93 std::array<GLuint, MAX_TEXTURES> samplers;
94 std::array<GLuint, MAX_TEXTURES> textures;
95 std::array<GLuint, MAX_IMAGES> images;
96 GLsizei sampler_binding{};
97 GLsizei texture_binding{};
98 GLsizei image_binding{};
99
100 const auto& qmd{kepler_compute.launch_description};
101 const auto& cbufs{qmd.const_buffer_config};
102 const bool via_header_index{qmd.linked_tsc != 0};
103 const auto read_handle{[&](const auto& desc, u32 index) {
104 ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
105 const u32 index_offset{index << desc.size_shift};
106 const u32 offset{desc.cbuf_offset + index_offset};
107 const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
108 if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
109 std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
110 if (desc.has_secondary) {
111 ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
112 const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
113 const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
114 secondary_offset};
115 const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
116 const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
117 return TexturePair(lhs_raw | rhs_raw, via_header_index);
118 }
119 }
120 return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
121 }};
122 const auto add_image{[&](const auto& desc) {
123 for (u32 index = 0; index < desc.count; ++index) {
124 const auto handle{read_handle(desc, index)};
125 image_view_indices.push_back(handle.first);
126 }
127 }};
128 for (const auto& desc : info.texture_buffer_descriptors) {
129 for (u32 index = 0; index < desc.count; ++index) {
130 const auto handle{read_handle(desc, index)};
131 image_view_indices.push_back(handle.first);
132 samplers[sampler_binding++] = 0;
133 }
134 }
135 std::ranges::for_each(info.image_buffer_descriptors, add_image);
136 for (const auto& desc : info.texture_descriptors) {
137 for (u32 index = 0; index < desc.count; ++index) {
138 const auto handle{read_handle(desc, index)};
139 image_view_indices.push_back(handle.first);
140
141 Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
142 samplers[sampler_binding++] = sampler->Handle();
143 }
144 }
145 std::ranges::for_each(info.image_descriptors, add_image);
146
147 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
148 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
149
150 if (assembly_program.handle != 0) {
151 program_manager.BindComputeAssemblyProgram(assembly_program.handle);
152 } else {
153 program_manager.BindComputeProgram(source_program.handle);
154 }
155 buffer_cache.UnbindComputeTextureBuffers();
156 size_t texbuf_index{};
157 const auto add_buffer{[&](const auto& desc) {
158 constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
159 for (u32 i = 0; i < desc.count; ++i) {
160 bool is_written{false};
161 if constexpr (is_image) {
162 is_written = desc.is_written;
163 }
164 ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])};
165 buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
166 image_view.BufferSize(), image_view.format,
167 is_written, is_image);
168 ++texbuf_index;
169 }
170 }};
171 std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
172 std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
173
174 buffer_cache.UpdateComputeBuffers();
175
176 buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
177 buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
178 buffer_cache.BindHostComputeBuffers();
179
180 const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers};
181 texture_binding += num_texture_buffers;
182 image_binding += num_image_buffers;
183
184 for (const auto& desc : info.texture_descriptors) {
185 for (u32 index = 0; index < desc.count; ++index) {
186 ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
187 textures[texture_binding++] = image_view.Handle(desc.type);
188 }
189 }
190 for (const auto& desc : info.image_descriptors) {
191 for (u32 index = 0; index < desc.count; ++index) {
192 ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
193 if (desc.is_written) {
194 texture_cache.MarkModification(image_view.image_id);
195 }
196 images[image_binding++] = image_view.StorageView(desc.type, desc.format);
197 }
198 }
199 if (texture_binding != 0) {
200 ASSERT(texture_binding == sampler_binding);
201 glBindTextures(0, texture_binding, textures.data());
202 glBindSamplers(0, sampler_binding, samplers.data());
203 }
204 if (image_binding != 0) {
205 glBindImageTextures(0, image_binding, images.data());
206 }
207}
208
209} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h
new file mode 100644
index 000000000..50c676365
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h
@@ -0,0 +1,93 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <type_traits>
9#include <utility>
10
11#include "common/common_types.h"
12#include "shader_recompiler/shader_info.h"
13#include "video_core/renderer_opengl/gl_buffer_cache.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h"
15#include "video_core/renderer_opengl/gl_texture_cache.h"
16
17namespace Tegra {
18class MemoryManager;
19}
20
21namespace Tegra::Engines {
22class KeplerCompute;
23}
24
25namespace Shader {
26struct Info;
27}
28
29namespace OpenGL {
30
31class Device;
32class ProgramManager;
33
34struct ComputePipelineKey {
35 u64 unique_hash;
36 u32 shared_memory_size;
37 std::array<u32, 3> workgroup_size;
38
39 size_t Hash() const noexcept;
40
41 bool operator==(const ComputePipelineKey&) const noexcept;
42
43 bool operator!=(const ComputePipelineKey& rhs) const noexcept {
44 return !operator==(rhs);
45 }
46};
47static_assert(std::has_unique_object_representations_v<ComputePipelineKey>);
48static_assert(std::is_trivially_copyable_v<ComputePipelineKey>);
49static_assert(std::is_trivially_constructible_v<ComputePipelineKey>);
50
51class ComputePipeline {
52public:
53 explicit ComputePipeline(const Device& device, TextureCache& texture_cache_,
54 BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
55 Tegra::Engines::KeplerCompute& kepler_compute_,
56 ProgramManager& program_manager_, const Shader::Info& info_,
57 std::string code, std::vector<u32> code_v);
58
59 void Configure();
60
61 [[nodiscard]] bool WritesGlobalMemory() const noexcept {
62 return writes_global_memory;
63 }
64
65private:
66 TextureCache& texture_cache;
67 BufferCache& buffer_cache;
68 Tegra::MemoryManager& gpu_memory;
69 Tegra::Engines::KeplerCompute& kepler_compute;
70 ProgramManager& program_manager;
71
72 Shader::Info info;
73 OGLProgram source_program;
74 OGLAssemblyProgram assembly_program;
75 VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
76
77 u32 num_texture_buffers{};
78 u32 num_image_buffers{};
79
80 bool use_storage_buffers{};
81 bool writes_global_memory{};
82};
83
84} // namespace OpenGL
85
86namespace std {
87template <>
88struct hash<OpenGL::ComputePipelineKey> {
89 size_t operator()(const OpenGL::ComputePipelineKey& k) const noexcept {
90 return k.Hash();
91 }
92};
93} // namespace std
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 3b00614e7..9692b8e94 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -17,39 +17,17 @@
17#include "common/logging/log.h" 17#include "common/logging/log.h"
18#include "common/scope_exit.h" 18#include "common/scope_exit.h"
19#include "common/settings.h" 19#include "common/settings.h"
20#include "shader_recompiler/stage.h"
20#include "video_core/renderer_opengl/gl_device.h" 21#include "video_core/renderer_opengl/gl_device.h"
21#include "video_core/renderer_opengl/gl_resource_manager.h" 22#include "video_core/renderer_opengl/gl_resource_manager.h"
22 23
23namespace OpenGL { 24namespace OpenGL {
24namespace { 25namespace {
25// One uniform block is reserved for emulation purposes
26constexpr u32 ReservedUniformBlocks = 1;
27
28constexpr u32 NumStages = 5;
29
30constexpr std::array LIMIT_UBOS = { 26constexpr std::array LIMIT_UBOS = {
31 GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, 27 GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
32 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, 28 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
33 GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, 29 GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
34}; 30};
35constexpr std::array LIMIT_SSBOS = {
36 GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
37 GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
38 GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
39};
40constexpr std::array LIMIT_SAMPLERS = {
41 GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
42 GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
43 GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
44 GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
45 GL_MAX_TEXTURE_IMAGE_UNITS,
46 GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
47};
48constexpr std::array LIMIT_IMAGES = {
49 GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
50 GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
51 GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
52};
53 31
54template <typename T> 32template <typename T>
55T GetInteger(GLenum pname) { 33T GetInteger(GLenum pname) {
@@ -82,81 +60,18 @@ bool HasExtension(std::span<const std::string_view> extensions, std::string_view
82 return std::ranges::find(extensions, extension) != extensions.end(); 60 return std::ranges::find(extensions, extension) != extensions.end();
83} 61}
84 62
85u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { 63std::array<u32, Shader::MaxStageTypes> BuildMaxUniformBuffers() noexcept {
86 ASSERT(num >= amount); 64 std::array<u32, Shader::MaxStageTypes> max;
87 if (limit) { 65 std::ranges::transform(LIMIT_UBOS, max.begin(), &GetInteger<u32>);
88 amount = std::min(amount, GetInteger<u32>(*limit));
89 }
90 num -= amount;
91 return std::exchange(base, base + amount);
92}
93
94std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
95 std::array<u32, Tegra::Engines::MaxShaderTypes> max;
96 std::ranges::transform(LIMIT_UBOS, max.begin(),
97 [](GLenum pname) { return GetInteger<u32>(pname); });
98 return max; 66 return max;
99} 67}
100 68
101std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
102 std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
103
104 static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4};
105 const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS);
106 const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
107 const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS);
108
109 u32 num_ubos = total_ubos - ReservedUniformBlocks;
110 u32 num_ssbos = total_ssbos;
111 u32 num_samplers = total_samplers;
112
113 u32 base_ubo = ReservedUniformBlocks;
114 u32 base_ssbo = 0;
115 u32 base_samplers = 0;
116
117 for (std::size_t i = 0; i < NumStages; ++i) {
118 const std::size_t stage = stage_swizzle[i];
119 bindings[stage] = {
120 Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
121 Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
122 Extract(base_samplers, num_samplers, total_samplers / NumStages,
123 LIMIT_SAMPLERS[stage])};
124 }
125
126 u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
127 u32 base_images = 0;
128
129 // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
130 // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
131 // fragment stage, and at least 1 for the rest of the stages.
132 // So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
133
134 // Reserve at least 4 image bindings on the fragment stage.
135 bindings[4].image =
136 Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
137
138 // This is guaranteed to be at least 1.
139 const u32 total_extracted_images = num_images / (NumStages - 1);
140
141 // Reserve the other image bindings.
142 for (std::size_t i = 0; i < NumStages; ++i) {
143 const std::size_t stage = stage_swizzle[i];
144 if (stage == 4) {
145 continue;
146 }
147 bindings[stage].image =
148 Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
149 }
150
151 // Compute doesn't care about any of this.
152 bindings[5] = {0, 0, 0, 0};
153
154 return bindings;
155}
156
157bool IsASTCSupported() { 69bool IsASTCSupported() {
158 static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; 70 static constexpr std::array targets{
159 static constexpr std::array formats = { 71 GL_TEXTURE_2D,
72 GL_TEXTURE_2D_ARRAY,
73 };
74 static constexpr std::array formats{
160 GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, 75 GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
161 GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, 76 GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
162 GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR, 77 GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR,
@@ -172,11 +87,10 @@ bool IsASTCSupported() {
172 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, 87 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
173 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, 88 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
174 }; 89 };
175 static constexpr std::array required_support = { 90 static constexpr std::array required_support{
176 GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE, 91 GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE,
177 GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE, 92 GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE,
178 }; 93 };
179
180 for (const GLenum target : targets) { 94 for (const GLenum target : targets) {
181 for (const GLenum format : formats) { 95 for (const GLenum format : formats) {
182 for (const GLenum support : required_support) { 96 for (const GLenum support : required_support) {
@@ -223,14 +137,13 @@ Device::Device() {
223 "Beta driver 443.24 is known to have issues. There might be performance issues."); 137 "Beta driver 443.24 is known to have issues. There might be performance issues.");
224 disable_fast_buffer_sub_data = true; 138 disable_fast_buffer_sub_data = true;
225 } 139 }
226
227 max_uniform_buffers = BuildMaxUniformBuffers(); 140 max_uniform_buffers = BuildMaxUniformBuffers();
228 base_bindings = BuildBaseBindings();
229 uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 141 uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
230 shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); 142 shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
231 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); 143 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
232 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); 144 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
233 max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); 145 max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
146 max_glasm_storage_buffer_blocks = GetInteger<u32>(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS);
234 has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && 147 has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
235 GLAD_GL_NV_shader_thread_shuffle; 148 GLAD_GL_NV_shader_thread_shuffle;
236 has_shader_ballot = GLAD_GL_ARB_shader_ballot; 149 has_shader_ballot = GLAD_GL_ARB_shader_ballot;
@@ -243,18 +156,30 @@ Device::Device() {
243 has_precise_bug = TestPreciseBug(); 156 has_precise_bug = TestPreciseBug();
244 has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); 157 has_broken_texture_view_formats = is_amd || (!is_linux && is_intel);
245 has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; 158 has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
159 has_derivative_control = GLAD_GL_ARB_derivative_control;
246 has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; 160 has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
247 has_debugging_tool_attached = IsDebugToolAttached(extensions); 161 has_debugging_tool_attached = IsDebugToolAttached(extensions);
248 has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); 162 has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
163 has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough;
164 has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5;
165 has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64");
166 has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
167 has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
168 warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
169 need_fastmath_off = is_nvidia;
249 170
250 // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive 171 // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
251 // uniform buffers as "push constants" 172 // uniform buffers as "push constants"
252 has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; 173 has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
253 174
254 use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() && 175 shader_backend = Settings::values.shader_backend.GetValue();
176 use_assembly_shaders = shader_backend == Settings::ShaderBackend::GLASM &&
255 GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && 177 GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
256 GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; 178 GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
257 179 if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) {
180 LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
181 shader_backend = Settings::ShaderBackend::GLSL;
182 }
258 // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. 183 // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation.
259 use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && 184 use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
260 !(is_amd || (is_intel && !is_linux)); 185 !(is_amd || (is_intel && !is_linux));
@@ -265,11 +190,6 @@ Device::Device() {
265 LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); 190 LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
266 LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", 191 LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}",
267 has_broken_texture_view_formats); 192 has_broken_texture_view_formats);
268
269 if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) {
270 LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
271 }
272
273 if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) { 193 if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) {
274 LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported"); 194 LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported");
275 } 195 }
@@ -325,22 +245,6 @@ std::string Device::GetVendorName() const {
325 return vendor_name; 245 return vendor_name;
326} 246}
327 247
328Device::Device(std::nullptr_t) {
329 max_uniform_buffers.fill(std::numeric_limits<u32>::max());
330 uniform_buffer_alignment = 4;
331 shader_storage_alignment = 4;
332 max_vertex_attributes = 16;
333 max_varyings = 15;
334 max_compute_shared_memory_size = 0x10000;
335 has_warp_intrinsics = true;
336 has_shader_ballot = true;
337 has_vertex_viewport_layer = true;
338 has_image_load_formatted = true;
339 has_texture_shadow_lod = true;
340 has_variable_aoffi = true;
341 has_depth_buffer_float = true;
342}
343
344bool Device::TestVariableAoffi() { 248bool Device::TestVariableAoffi() {
345 return TestProgram(R"(#version 430 core 249 return TestProgram(R"(#version 430 core
346// This is a unit test, please ignore me on apitrace bug reports. 250// This is a unit test, please ignore me on apitrace bug reports.
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 2c2b13767..ee992aed4 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -6,34 +6,22 @@
6 6
7#include <cstddef> 7#include <cstddef>
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "video_core/engines/shader_type.h" 9#include "shader_recompiler/stage.h"
10
11namespace Settings {
12enum class ShaderBackend : u32;
13};
10 14
11namespace OpenGL { 15namespace OpenGL {
12 16
13class Device { 17class Device {
14public: 18public:
15 struct BaseBindings {
16 u32 uniform_buffer{};
17 u32 shader_storage_buffer{};
18 u32 sampler{};
19 u32 image{};
20 };
21
22 explicit Device(); 19 explicit Device();
23 explicit Device(std::nullptr_t);
24 20
25 [[nodiscard]] std::string GetVendorName() const; 21 [[nodiscard]] std::string GetVendorName() const;
26 22
27 u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { 23 u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept {
28 return max_uniform_buffers[static_cast<std::size_t>(shader_type)]; 24 return max_uniform_buffers[static_cast<size_t>(stage)];
29 }
30
31 const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
32 return base_bindings[stage_index];
33 }
34
35 const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept {
36 return GetBaseBindings(static_cast<std::size_t>(shader_type));
37 } 25 }
38 26
39 size_t GetUniformBufferAlignment() const { 27 size_t GetUniformBufferAlignment() const {
@@ -56,6 +44,10 @@ public:
56 return max_compute_shared_memory_size; 44 return max_compute_shared_memory_size;
57 } 45 }
58 46
47 u32 GetMaxGLASMStorageBufferBlocks() const {
48 return max_glasm_storage_buffer_blocks;
49 }
50
59 bool HasWarpIntrinsics() const { 51 bool HasWarpIntrinsics() const {
60 return has_warp_intrinsics; 52 return has_warp_intrinsics;
61 } 53 }
@@ -108,6 +100,10 @@ public:
108 return has_nv_viewport_array2; 100 return has_nv_viewport_array2;
109 } 101 }
110 102
103 bool HasDerivativeControl() const {
104 return has_derivative_control;
105 }
106
111 bool HasDebuggingToolAttached() const { 107 bool HasDebuggingToolAttached() const {
112 return has_debugging_tool_attached; 108 return has_debugging_tool_attached;
113 } 109 }
@@ -128,18 +124,52 @@ public:
128 return has_depth_buffer_float; 124 return has_depth_buffer_float;
129 } 125 }
130 126
127 bool HasGeometryShaderPassthrough() const {
128 return has_geometry_shader_passthrough;
129 }
130
131 bool HasNvGpuShader5() const {
132 return has_nv_gpu_shader_5;
133 }
134
135 bool HasShaderInt64() const {
136 return has_shader_int64;
137 }
138
139 bool HasAmdShaderHalfFloat() const {
140 return has_amd_shader_half_float;
141 }
142
143 bool HasSparseTexture2() const {
144 return has_sparse_texture_2;
145 }
146
147 bool IsWarpSizePotentiallyLargerThanGuest() const {
148 return warp_size_potentially_larger_than_guest;
149 }
150
151 bool NeedsFastmathOff() const {
152 return need_fastmath_off;
153 }
154
155 Settings::ShaderBackend GetShaderBackend() const {
156 return shader_backend;
157 }
158
131private: 159private:
132 static bool TestVariableAoffi(); 160 static bool TestVariableAoffi();
133 static bool TestPreciseBug(); 161 static bool TestPreciseBug();
134 162
135 std::string vendor_name; 163 std::array<u32, Shader::MaxStageTypes> max_uniform_buffers{};
136 std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
137 std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
138 size_t uniform_buffer_alignment{}; 164 size_t uniform_buffer_alignment{};
139 size_t shader_storage_alignment{}; 165 size_t shader_storage_alignment{};
140 u32 max_vertex_attributes{}; 166 u32 max_vertex_attributes{};
141 u32 max_varyings{}; 167 u32 max_varyings{};
142 u32 max_compute_shared_memory_size{}; 168 u32 max_compute_shared_memory_size{};
169 u32 max_glasm_storage_buffer_blocks{};
170
171 Settings::ShaderBackend shader_backend{};
172
143 bool has_warp_intrinsics{}; 173 bool has_warp_intrinsics{};
144 bool has_shader_ballot{}; 174 bool has_shader_ballot{};
145 bool has_vertex_viewport_layer{}; 175 bool has_vertex_viewport_layer{};
@@ -153,11 +183,21 @@ private:
153 bool has_broken_texture_view_formats{}; 183 bool has_broken_texture_view_formats{};
154 bool has_fast_buffer_sub_data{}; 184 bool has_fast_buffer_sub_data{};
155 bool has_nv_viewport_array2{}; 185 bool has_nv_viewport_array2{};
186 bool has_derivative_control{};
156 bool has_debugging_tool_attached{}; 187 bool has_debugging_tool_attached{};
157 bool use_assembly_shaders{}; 188 bool use_assembly_shaders{};
158 bool use_asynchronous_shaders{}; 189 bool use_asynchronous_shaders{};
159 bool use_driver_cache{}; 190 bool use_driver_cache{};
160 bool has_depth_buffer_float{}; 191 bool has_depth_buffer_float{};
192 bool has_geometry_shader_passthrough{};
193 bool has_nv_gpu_shader_5{};
194 bool has_shader_int64{};
195 bool has_amd_shader_half_float{};
196 bool has_sparse_texture_2{};
197 bool warp_size_potentially_larger_than_guest{};
198 bool need_fastmath_off{};
199
200 std::string vendor_name;
161}; 201};
162 202
163} // namespace OpenGL 203} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
new file mode 100644
index 000000000..fac0034fb
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -0,0 +1,572 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <string>
8#include <vector>
9
10#include "common/settings.h" // for enum class Settings::ShaderBackend
11#include "common/thread_worker.h"
12#include "shader_recompiler/shader_info.h"
13#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
14#include "video_core/renderer_opengl/gl_shader_manager.h"
15#include "video_core/renderer_opengl/gl_shader_util.h"
16#include "video_core/renderer_opengl/gl_state_tracker.h"
17#include "video_core/shader_notify.h"
18#include "video_core/texture_cache/texture_cache.h"
19
20#if defined(_MSC_VER) && defined(NDEBUG)
21#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
22#else
23#define LAMBDA_FORCEINLINE
24#endif
25
26namespace OpenGL {
27namespace {
28using Shader::ImageBufferDescriptor;
29using Shader::ImageDescriptor;
30using Shader::TextureBufferDescriptor;
31using Shader::TextureDescriptor;
32using Tegra::Texture::TexturePair;
33using VideoCommon::ImageId;
34
35constexpr u32 MAX_TEXTURES = 64;
36constexpr u32 MAX_IMAGES = 8;
37
38template <typename Range>
39u32 AccumulateCount(const Range& range) {
40 u32 num{};
41 for (const auto& desc : range) {
42 num += desc.count;
43 }
44 return num;
45}
46
47GLenum Stage(size_t stage_index) {
48 switch (stage_index) {
49 case 0:
50 return GL_VERTEX_SHADER;
51 case 1:
52 return GL_TESS_CONTROL_SHADER;
53 case 2:
54 return GL_TESS_EVALUATION_SHADER;
55 case 3:
56 return GL_GEOMETRY_SHADER;
57 case 4:
58 return GL_FRAGMENT_SHADER;
59 }
60 UNREACHABLE_MSG("{}", stage_index);
61 return GL_NONE;
62}
63
64GLenum AssemblyStage(size_t stage_index) {
65 switch (stage_index) {
66 case 0:
67 return GL_VERTEX_PROGRAM_NV;
68 case 1:
69 return GL_TESS_CONTROL_PROGRAM_NV;
70 case 2:
71 return GL_TESS_EVALUATION_PROGRAM_NV;
72 case 3:
73 return GL_GEOMETRY_PROGRAM_NV;
74 case 4:
75 return GL_FRAGMENT_PROGRAM_NV;
76 }
77 UNREACHABLE_MSG("{}", stage_index);
78 return GL_NONE;
79}
80
81/// Translates hardware transform feedback indices
82/// @param location Hardware location
83/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
84/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
85std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
86 const u8 index = location / 4;
87 if (index >= 8 && index <= 39) {
88 return {GL_GENERIC_ATTRIB_NV, index - 8};
89 }
90 if (index >= 48 && index <= 55) {
91 return {GL_TEXTURE_COORD_NV, index - 48};
92 }
93 switch (index) {
94 case 7:
95 return {GL_POSITION, 0};
96 case 40:
97 return {GL_PRIMARY_COLOR_NV, 0};
98 case 41:
99 return {GL_SECONDARY_COLOR_NV, 0};
100 case 42:
101 return {GL_BACK_PRIMARY_COLOR_NV, 0};
102 case 43:
103 return {GL_BACK_SECONDARY_COLOR_NV, 0};
104 }
105 UNIMPLEMENTED_MSG("index={}", index);
106 return {GL_POSITION, 0};
107}
108
109template <typename Spec>
110bool Passes(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) {
111 for (size_t stage = 0; stage < stage_infos.size(); ++stage) {
112 if (!Spec::enabled_stages[stage] && ((enabled_mask >> stage) & 1) != 0) {
113 return false;
114 }
115 const auto& info{stage_infos[stage]};
116 if constexpr (!Spec::has_storage_buffers) {
117 if (!info.storage_buffers_descriptors.empty()) {
118 return false;
119 }
120 }
121 if constexpr (!Spec::has_texture_buffers) {
122 if (!info.texture_buffer_descriptors.empty()) {
123 return false;
124 }
125 }
126 if constexpr (!Spec::has_image_buffers) {
127 if (!info.image_buffer_descriptors.empty()) {
128 return false;
129 }
130 }
131 if constexpr (!Spec::has_images) {
132 if (!info.image_descriptors.empty()) {
133 return false;
134 }
135 }
136 }
137 return true;
138}
139
140using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool);
141
142template <typename Spec, typename... Specs>
143ConfigureFuncPtr FindSpec(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) {
144 if constexpr (sizeof...(Specs) > 0) {
145 if (!Passes<Spec>(stage_infos, enabled_mask)) {
146 return FindSpec<Specs...>(stage_infos, enabled_mask);
147 }
148 }
149 return GraphicsPipeline::MakeConfigureSpecFunc<Spec>();
150}
151
152struct SimpleVertexFragmentSpec {
153 static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true};
154 static constexpr bool has_storage_buffers = false;
155 static constexpr bool has_texture_buffers = false;
156 static constexpr bool has_image_buffers = false;
157 static constexpr bool has_images = false;
158};
159
160struct SimpleVertexSpec {
161 static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false};
162 static constexpr bool has_storage_buffers = false;
163 static constexpr bool has_texture_buffers = false;
164 static constexpr bool has_image_buffers = false;
165 static constexpr bool has_images = false;
166};
167
168struct DefaultSpec {
169 static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
170 static constexpr bool has_storage_buffers = true;
171 static constexpr bool has_texture_buffers = true;
172 static constexpr bool has_image_buffers = true;
173 static constexpr bool has_images = true;
174};
175
176ConfigureFuncPtr ConfigureFunc(const std::array<Shader::Info, 5>& infos, u32 enabled_mask) {
177 return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(infos, enabled_mask);
178}
179} // Anonymous namespace
180
181GraphicsPipeline::GraphicsPipeline(
182 const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_,
183 Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
184 ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker,
185 VideoCore::ShaderNotify* shader_notify, std::array<std::string, 5> sources,
186 std::array<std::vector<u32>, 5> sources_spirv, const std::array<const Shader::Info*, 5>& infos,
187 const GraphicsPipelineKey& key_)
188 : texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
189 gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_},
190 state_tracker{state_tracker_}, key{key_} {
191 if (shader_notify) {
192 shader_notify->MarkShaderBuilding();
193 }
194 u32 num_textures{};
195 u32 num_images{};
196 u32 num_storage_buffers{};
197 for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) {
198 auto& info{stage_infos[stage]};
199 if (infos[stage]) {
200 info = *infos[stage];
201 enabled_stages_mask |= 1u << stage;
202 }
203 if (stage < 4) {
204 base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
205 base_storage_bindings[stage + 1] = base_storage_bindings[stage];
206
207 base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors);
208 base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors);
209 }
210 enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask;
211 std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
212
213 const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)};
214 num_texture_buffers[stage] += num_tex_buffer_bindings;
215 num_textures += num_tex_buffer_bindings;
216
217 const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)};
218 num_image_buffers[stage] += num_img_buffers_bindings;
219 num_images += num_img_buffers_bindings;
220
221 num_textures += AccumulateCount(info.texture_descriptors);
222 num_images += AccumulateCount(info.image_descriptors);
223 num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors);
224
225 writes_global_memory |= std::ranges::any_of(
226 info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
227 }
228 ASSERT(num_textures <= MAX_TEXTURES);
229 ASSERT(num_images <= MAX_IMAGES);
230
231 const bool assembly_shaders{assembly_programs[0].handle != 0};
232 use_storage_buffers =
233 !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
234 writes_global_memory &= !use_storage_buffers;
235 configure_func = ConfigureFunc(stage_infos, enabled_stages_mask);
236
237 if (key.xfb_enabled && device.UseAssemblyShaders()) {
238 GenerateTransformFeedbackState();
239 }
240 const bool in_parallel = thread_worker != nullptr;
241 const auto backend = device.GetShaderBackend();
242 auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv),
243 shader_notify, backend, in_parallel](ShaderContext::Context*) mutable {
244 for (size_t stage = 0; stage < 5; ++stage) {
245 switch (backend) {
246 case Settings::ShaderBackend::GLSL:
247 if (!sources[stage].empty()) {
248 source_programs[stage] = CreateProgram(sources[stage], Stage(stage));
249 }
250 break;
251 case Settings::ShaderBackend::GLASM:
252 if (!sources[stage].empty()) {
253 assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage));
254 if (in_parallel) {
255 // Make sure program is built before continuing when building in parallel
256 glGetString(GL_PROGRAM_ERROR_STRING_NV);
257 }
258 }
259 break;
260 case Settings::ShaderBackend::SPIRV:
261 if (!sources_spirv[stage].empty()) {
262 source_programs[stage] = CreateProgram(sources_spirv[stage], Stage(stage));
263 }
264 break;
265 }
266 }
267 if (in_parallel && backend != Settings::ShaderBackend::GLASM) {
268 // Make sure programs have built if we are building shaders in parallel
269 for (OGLProgram& program : source_programs) {
270 if (program.handle != 0) {
271 GLint status{};
272 glGetProgramiv(program.handle, GL_LINK_STATUS, &status);
273 }
274 }
275 }
276 if (shader_notify) {
277 shader_notify->MarkShaderComplete();
278 }
279 is_built = true;
280 built_condvar.notify_one();
281 }};
282 if (thread_worker) {
283 thread_worker->QueueWork(std::move(func));
284 } else {
285 func(nullptr);
286 }
287}
288
289template <typename Spec>
290void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
291 std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
292 std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
293 std::array<GLuint, MAX_TEXTURES> samplers;
294 size_t image_view_index{};
295 GLsizei sampler_binding{};
296
297 texture_cache.SynchronizeGraphicsDescriptors();
298
299 buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
300 buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
301 buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings);
302 buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
303
304 const auto& regs{maxwell3d.regs};
305 const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
306 const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
307 const Shader::Info& info{stage_infos[stage]};
308 buffer_cache.UnbindGraphicsStorageBuffers(stage);
309 if constexpr (Spec::has_storage_buffers) {
310 size_t ssbo_index{};
311 for (const auto& desc : info.storage_buffers_descriptors) {
312 ASSERT(desc.count == 1);
313 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index,
314 desc.cbuf_offset, desc.is_written);
315 ++ssbo_index;
316 }
317 }
318 const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
319 const auto read_handle{[&](const auto& desc, u32 index) {
320 ASSERT(cbufs[desc.cbuf_index].enabled);
321 const u32 index_offset{index << desc.size_shift};
322 const u32 offset{desc.cbuf_offset + index_offset};
323 const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset};
324 if constexpr (std::is_same_v<decltype(desc), const TextureDescriptor&> ||
325 std::is_same_v<decltype(desc), const TextureBufferDescriptor&>) {
326 if (desc.has_secondary) {
327 ASSERT(cbufs[desc.secondary_cbuf_index].enabled);
328 const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
329 const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
330 second_offset};
331 const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
332 const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
333 const u32 raw{lhs_raw | rhs_raw};
334 return TexturePair(raw, via_header_index);
335 }
336 }
337 return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
338 }};
339 const auto add_image{[&](const auto& desc) {
340 for (u32 index = 0; index < desc.count; ++index) {
341 const auto handle{read_handle(desc, index)};
342 image_view_indices[image_view_index++] = handle.first;
343 }
344 }};
345 if constexpr (Spec::has_texture_buffers) {
346 for (const auto& desc : info.texture_buffer_descriptors) {
347 for (u32 index = 0; index < desc.count; ++index) {
348 const auto handle{read_handle(desc, index)};
349 image_view_indices[image_view_index++] = handle.first;
350 samplers[sampler_binding++] = 0;
351 }
352 }
353 }
354 if constexpr (Spec::has_image_buffers) {
355 for (const auto& desc : info.image_buffer_descriptors) {
356 add_image(desc);
357 }
358 }
359 for (const auto& desc : info.texture_descriptors) {
360 for (u32 index = 0; index < desc.count; ++index) {
361 const auto handle{read_handle(desc, index)};
362 image_view_indices[image_view_index++] = handle.first;
363
364 Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
365 samplers[sampler_binding++] = sampler->Handle();
366 }
367 }
368 if constexpr (Spec::has_images) {
369 for (const auto& desc : info.image_descriptors) {
370 add_image(desc);
371 }
372 }
373 }};
374 if constexpr (Spec::enabled_stages[0]) {
375 config_stage(0);
376 }
377 if constexpr (Spec::enabled_stages[1]) {
378 config_stage(1);
379 }
380 if constexpr (Spec::enabled_stages[2]) {
381 config_stage(2);
382 }
383 if constexpr (Spec::enabled_stages[3]) {
384 config_stage(3);
385 }
386 if constexpr (Spec::enabled_stages[4]) {
387 config_stage(4);
388 }
389 const std::span indices_span(image_view_indices.data(), image_view_index);
390 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
391
392 texture_cache.UpdateRenderTargets(false);
393 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
394
395 ImageId* texture_buffer_index{image_view_ids.data()};
396 const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
397 size_t index{};
398 const auto add_buffer{[&](const auto& desc) {
399 constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
400 for (u32 i = 0; i < desc.count; ++i) {
401 bool is_written{false};
402 if constexpr (is_image) {
403 is_written = desc.is_written;
404 }
405 ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
406 buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
407 image_view.BufferSize(), image_view.format,
408 is_written, is_image);
409 ++index;
410 ++texture_buffer_index;
411 }
412 }};
413 const Shader::Info& info{stage_infos[stage]};
414 buffer_cache.UnbindGraphicsTextureBuffers(stage);
415
416 if constexpr (Spec::has_texture_buffers) {
417 for (const auto& desc : info.texture_buffer_descriptors) {
418 add_buffer(desc);
419 }
420 }
421 if constexpr (Spec::has_image_buffers) {
422 for (const auto& desc : info.image_buffer_descriptors) {
423 add_buffer(desc);
424 }
425 }
426 for (const auto& desc : info.texture_descriptors) {
427 texture_buffer_index += desc.count;
428 }
429 if constexpr (Spec::has_images) {
430 for (const auto& desc : info.image_descriptors) {
431 texture_buffer_index += desc.count;
432 }
433 }
434 }};
435 if constexpr (Spec::enabled_stages[0]) {
436 bind_stage_info(0);
437 }
438 if constexpr (Spec::enabled_stages[1]) {
439 bind_stage_info(1);
440 }
441 if constexpr (Spec::enabled_stages[2]) {
442 bind_stage_info(2);
443 }
444 if constexpr (Spec::enabled_stages[3]) {
445 bind_stage_info(3);
446 }
447 if constexpr (Spec::enabled_stages[4]) {
448 bind_stage_info(4);
449 }
450 buffer_cache.UpdateGraphicsBuffers(is_indexed);
451 buffer_cache.BindHostGeometryBuffers(is_indexed);
452
453 if (!is_built.load(std::memory_order::relaxed)) {
454 WaitForBuild();
455 }
456 if (assembly_programs[0].handle != 0) {
457 program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask);
458 } else {
459 program_manager.BindSourcePrograms(source_programs);
460 }
461 const ImageId* views_it{image_view_ids.data()};
462 GLsizei texture_binding = 0;
463 GLsizei image_binding = 0;
464 std::array<GLuint, MAX_TEXTURES> textures;
465 std::array<GLuint, MAX_IMAGES> images;
466 const auto prepare_stage{[&](size_t stage) {
467 buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]);
468 buffer_cache.BindHostStageBuffers(stage);
469
470 texture_binding += num_texture_buffers[stage];
471 image_binding += num_image_buffers[stage];
472
473 views_it += num_texture_buffers[stage];
474 views_it += num_image_buffers[stage];
475
476 const auto& info{stage_infos[stage]};
477 for (const auto& desc : info.texture_descriptors) {
478 for (u32 index = 0; index < desc.count; ++index) {
479 ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
480 textures[texture_binding++] = image_view.Handle(desc.type);
481 }
482 }
483 for (const auto& desc : info.image_descriptors) {
484 for (u32 index = 0; index < desc.count; ++index) {
485 ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
486 if (desc.is_written) {
487 texture_cache.MarkModification(image_view.image_id);
488 }
489 images[image_binding++] = image_view.StorageView(desc.type, desc.format);
490 }
491 }
492 }};
493 if constexpr (Spec::enabled_stages[0]) {
494 prepare_stage(0);
495 }
496 if constexpr (Spec::enabled_stages[1]) {
497 prepare_stage(1);
498 }
499 if constexpr (Spec::enabled_stages[2]) {
500 prepare_stage(2);
501 }
502 if constexpr (Spec::enabled_stages[3]) {
503 prepare_stage(3);
504 }
505 if constexpr (Spec::enabled_stages[4]) {
506 prepare_stage(4);
507 }
508 if (texture_binding != 0) {
509 ASSERT(texture_binding == sampler_binding);
510 glBindTextures(0, texture_binding, textures.data());
511 glBindSamplers(0, sampler_binding, samplers.data());
512 }
513 if (image_binding != 0) {
514 glBindImageTextures(0, image_binding, images.data());
515 }
516}
517
518void GraphicsPipeline::ConfigureTransformFeedbackImpl() const {
519 glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides,
520 xfb_streams.data(), GL_INTERLEAVED_ATTRIBS);
521}
522
523void GraphicsPipeline::GenerateTransformFeedbackState() {
524 // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
525 // when this is required.
526 GLint* cursor{xfb_attribs.data()};
527 GLint* current_stream{xfb_streams.data()};
528
529 for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
530 const auto& layout = key.xfb_state.layouts[feedback];
531 UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
532 if (layout.varying_count == 0) {
533 continue;
534 }
535 *current_stream = static_cast<GLint>(feedback);
536 if (current_stream != xfb_streams.data()) {
537 // When stepping one stream, push the expected token
538 cursor[0] = GL_NEXT_BUFFER_NV;
539 cursor[1] = 0;
540 cursor[2] = 0;
541 cursor += XFB_ENTRY_STRIDE;
542 }
543 ++current_stream;
544
545 const auto& locations = key.xfb_state.varyings[feedback];
546 std::optional<u8> current_index;
547 for (u32 offset = 0; offset < layout.varying_count; ++offset) {
548 const u8 location = locations[offset];
549 const u8 index = location / 4;
550
551 if (current_index == index) {
552 // Increase number of components of the previous attachment
553 ++cursor[-2];
554 continue;
555 }
556 current_index = index;
557
558 std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
559 cursor[1] = 1;
560 cursor += XFB_ENTRY_STRIDE;
561 }
562 }
563 num_xfb_attribs = static_cast<GLsizei>((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE);
564 num_xfb_strides = static_cast<GLsizei>(current_stream - xfb_streams.data());
565}
566
567void GraphicsPipeline::WaitForBuild() {
568 std::unique_lock lock{built_mutex};
569 built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
570}
571
572} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
new file mode 100644
index 000000000..4e28d9a42
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -0,0 +1,169 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstring>
9#include <type_traits>
10#include <utility>
11
12#include "common/bit_field.h"
13#include "common/cityhash.h"
14#include "common/common_types.h"
15#include "shader_recompiler/shader_info.h"
16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/memory_manager.h"
18#include "video_core/renderer_opengl/gl_buffer_cache.h"
19#include "video_core/renderer_opengl/gl_resource_manager.h"
20#include "video_core/renderer_opengl/gl_texture_cache.h"
21#include "video_core/transform_feedback.h"
22
23namespace OpenGL {
24
25namespace ShaderContext {
26struct Context;
27}
28
29class Device;
30class ProgramManager;
31
32using Maxwell = Tegra::Engines::Maxwell3D::Regs;
33using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
34
35struct GraphicsPipelineKey {
36 std::array<u64, 6> unique_hashes;
37 union {
38 u32 raw;
39 BitField<0, 1, u32> xfb_enabled;
40 BitField<1, 1, u32> early_z;
41 BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
42 BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive;
43 BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing;
44 BitField<10, 1, u32> tessellation_clockwise;
45 };
46 std::array<u32, 3> padding;
47 VideoCommon::TransformFeedbackState xfb_state;
48
49 size_t Hash() const noexcept {
50 return static_cast<size_t>(Common::CityHash64(reinterpret_cast<const char*>(this), Size()));
51 }
52
53 bool operator==(const GraphicsPipelineKey& rhs) const noexcept {
54 return std::memcmp(this, &rhs, Size()) == 0;
55 }
56
57 bool operator!=(const GraphicsPipelineKey& rhs) const noexcept {
58 return !operator==(rhs);
59 }
60
61 [[nodiscard]] size_t Size() const noexcept {
62 if (xfb_enabled != 0) {
63 return sizeof(GraphicsPipelineKey);
64 } else {
65 return offsetof(GraphicsPipelineKey, padding);
66 }
67 }
68};
69static_assert(std::has_unique_object_representations_v<GraphicsPipelineKey>);
70static_assert(std::is_trivially_copyable_v<GraphicsPipelineKey>);
71static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>);
72
73class GraphicsPipeline {
74public:
75 explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
76 BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
77 Tegra::Engines::Maxwell3D& maxwell3d_,
78 ProgramManager& program_manager_, StateTracker& state_tracker_,
79 ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify,
80 std::array<std::string, 5> sources,
81 std::array<std::vector<u32>, 5> sources_spirv,
82 const std::array<const Shader::Info*, 5>& infos,
83 const GraphicsPipelineKey& key_);
84
85 void Configure(bool is_indexed) {
86 configure_func(this, is_indexed);
87 }
88
89 void ConfigureTransformFeedback() const {
90 if (num_xfb_attribs != 0) {
91 ConfigureTransformFeedbackImpl();
92 }
93 }
94
95 [[nodiscard]] const GraphicsPipelineKey& Key() const noexcept {
96 return key;
97 }
98
99 [[nodiscard]] bool WritesGlobalMemory() const noexcept {
100 return writes_global_memory;
101 }
102
103 [[nodiscard]] bool IsBuilt() const noexcept {
104 return is_built.load(std::memory_order::relaxed);
105 }
106
107 template <typename Spec>
108 static auto MakeConfigureSpecFunc() {
109 return [](GraphicsPipeline* pipeline, bool is_indexed) {
110 pipeline->ConfigureImpl<Spec>(is_indexed);
111 };
112 }
113
114private:
115 template <typename Spec>
116 void ConfigureImpl(bool is_indexed);
117
118 void ConfigureTransformFeedbackImpl() const;
119
120 void GenerateTransformFeedbackState();
121
122 void WaitForBuild();
123
124 TextureCache& texture_cache;
125 BufferCache& buffer_cache;
126 Tegra::MemoryManager& gpu_memory;
127 Tegra::Engines::Maxwell3D& maxwell3d;
128 ProgramManager& program_manager;
129 StateTracker& state_tracker;
130 const GraphicsPipelineKey key;
131
132 void (*configure_func)(GraphicsPipeline*, bool){};
133
134 std::array<OGLProgram, 5> source_programs;
135 std::array<OGLAssemblyProgram, 5> assembly_programs;
136 u32 enabled_stages_mask{};
137
138 std::array<Shader::Info, 5> stage_infos{};
139 std::array<u32, 5> enabled_uniform_buffer_masks{};
140 VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
141 std::array<u32, 5> base_uniform_bindings{};
142 std::array<u32, 5> base_storage_bindings{};
143 std::array<u32, 5> num_texture_buffers{};
144 std::array<u32, 5> num_image_buffers{};
145
146 bool use_storage_buffers{};
147 bool writes_global_memory{};
148
149 static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
150 GLsizei num_xfb_attribs{};
151 GLsizei num_xfb_strides{};
152 std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{};
153 std::array<GLint, Maxwell::NumTransformFeedbackBuffers> xfb_streams{};
154
155 std::mutex built_mutex;
156 std::condition_variable built_condvar;
157 std::atomic_bool is_built{false};
158};
159
160} // namespace OpenGL
161
162namespace std {
163template <>
164struct hash<OpenGL::GraphicsPipelineKey> {
165 size_t operator()(const OpenGL::GraphicsPipelineKey& k) const noexcept {
166 return k.Hash();
167 }
168};
169} // namespace std
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ceb3abcb2..41d2b73f4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -23,7 +23,6 @@
23#include "core/memory.h" 23#include "core/memory.h"
24#include "video_core/engines/kepler_compute.h" 24#include "video_core/engines/kepler_compute.h"
25#include "video_core/engines/maxwell_3d.h" 25#include "video_core/engines/maxwell_3d.h"
26#include "video_core/engines/shader_type.h"
27#include "video_core/memory_manager.h" 26#include "video_core/memory_manager.h"
28#include "video_core/renderer_opengl/gl_device.h" 27#include "video_core/renderer_opengl/gl_device.h"
29#include "video_core/renderer_opengl/gl_query_cache.h" 28#include "video_core/renderer_opengl/gl_query_cache.h"
@@ -40,7 +39,6 @@ namespace OpenGL {
40using Maxwell = Tegra::Engines::Maxwell3D::Regs; 39using Maxwell = Tegra::Engines::Maxwell3D::Regs;
41using GLvec4 = std::array<GLfloat, 4>; 40using GLvec4 = std::array<GLfloat, 4>;
42 41
43using Tegra::Engines::ShaderType;
44using VideoCore::Surface::PixelFormat; 42using VideoCore::Surface::PixelFormat;
45using VideoCore::Surface::SurfaceTarget; 43using VideoCore::Surface::SurfaceTarget;
46using VideoCore::Surface::SurfaceType; 44using VideoCore::Surface::SurfaceType;
@@ -51,112 +49,11 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
51MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100)); 49MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
52 50
53namespace { 51namespace {
54
55constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; 52constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
56 53
57struct TextureHandle {
58 constexpr TextureHandle(u32 data, bool via_header_index) {
59 const Tegra::Texture::TextureHandle handle{data};
60 image = handle.tic_id;
61 sampler = via_header_index ? image : handle.tsc_id.Value();
62 }
63
64 u32 image;
65 u32 sampler;
66};
67
68template <typename Engine, typename Entry>
69TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
70 ShaderType shader_type, size_t index = 0) {
71 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
72 if (entry.is_separated) {
73 const u32 buffer_1 = entry.buffer;
74 const u32 buffer_2 = entry.secondary_buffer;
75 const u32 offset_1 = entry.offset;
76 const u32 offset_2 = entry.secondary_offset;
77 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
78 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
79 return TextureHandle(handle_1 | handle_2, via_header_index);
80 }
81 }
82 if (entry.is_bindless) {
83 const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
84 return TextureHandle(raw, via_header_index);
85 }
86 const u32 buffer = engine.GetBoundBuffer();
87 const u64 offset = (entry.offset + index) * sizeof(u32);
88 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
89}
90
91/// Translates hardware transform feedback indices
92/// @param location Hardware location
93/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
94/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
95std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
96 const u8 index = location / 4;
97 if (index >= 8 && index <= 39) {
98 return {GL_GENERIC_ATTRIB_NV, index - 8};
99 }
100 if (index >= 48 && index <= 55) {
101 return {GL_TEXTURE_COORD_NV, index - 48};
102 }
103 switch (index) {
104 case 7:
105 return {GL_POSITION, 0};
106 case 40:
107 return {GL_PRIMARY_COLOR_NV, 0};
108 case 41:
109 return {GL_SECONDARY_COLOR_NV, 0};
110 case 42:
111 return {GL_BACK_PRIMARY_COLOR_NV, 0};
112 case 43:
113 return {GL_BACK_SECONDARY_COLOR_NV, 0};
114 }
115 UNIMPLEMENTED_MSG("index={}", index);
116 return {GL_POSITION, 0};
117}
118
119void oglEnable(GLenum cap, bool state) { 54void oglEnable(GLenum cap, bool state) {
120 (state ? glEnable : glDisable)(cap); 55 (state ? glEnable : glDisable)(cap);
121} 56}
122
123ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
124 if (entry.is_buffer) {
125 return ImageViewType::Buffer;
126 }
127 switch (entry.type) {
128 case Tegra::Shader::TextureType::Texture1D:
129 return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
130 case Tegra::Shader::TextureType::Texture2D:
131 return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
132 case Tegra::Shader::TextureType::Texture3D:
133 return ImageViewType::e3D;
134 case Tegra::Shader::TextureType::TextureCube:
135 return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
136 }
137 UNREACHABLE();
138 return ImageViewType::e2D;
139}
140
141ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
142 switch (entry.type) {
143 case Tegra::Shader::ImageType::Texture1D:
144 return ImageViewType::e1D;
145 case Tegra::Shader::ImageType::Texture1DArray:
146 return ImageViewType::e1DArray;
147 case Tegra::Shader::ImageType::Texture2D:
148 return ImageViewType::e2D;
149 case Tegra::Shader::ImageType::Texture2DArray:
150 return ImageViewType::e2DArray;
151 case Tegra::Shader::ImageType::Texture3D:
152 return ImageViewType::e3D;
153 case Tegra::Shader::ImageType::TextureBuffer:
154 return ImageViewType::Buffer;
155 }
156 UNREACHABLE();
157 return ImageViewType::e2D;
158}
159
160} // Anonymous namespace 57} // Anonymous namespace
161 58
162RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 59RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@@ -170,14 +67,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
170 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), 67 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
171 buffer_cache_runtime(device), 68 buffer_cache_runtime(device),
172 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), 69 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
173 shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), 70 shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
71 buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()),
174 query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), 72 query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
175 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), 73 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
176 async_shaders(emu_window_) {
177 if (device.UseAsynchronousShaders()) {
178 async_shaders.AllocateWorkers();
179 }
180}
181 74
182RasterizerOpenGL::~RasterizerOpenGL() = default; 75RasterizerOpenGL::~RasterizerOpenGL() = default;
183 76
@@ -204,7 +97,7 @@ void RasterizerOpenGL::SyncVertexFormats() {
204 const auto gl_index = static_cast<GLuint>(index); 97 const auto gl_index = static_cast<GLuint>(index);
205 98
206 // Disable constant attributes. 99 // Disable constant attributes.
207 if (attrib.IsConstant()) { 100 if (attrib.constant) {
208 glDisableVertexAttribArray(gl_index); 101 glDisableVertexAttribArray(gl_index);
209 continue; 102 continue;
210 } 103 }
@@ -244,116 +137,9 @@ void RasterizerOpenGL::SyncVertexInstances() {
244 } 137 }
245} 138}
246 139
247void RasterizerOpenGL::SetupShaders(bool is_indexed) {
248 u32 clip_distances = 0;
249
250 std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
251 image_view_indices.clear();
252 sampler_handles.clear();
253
254 texture_cache.SynchronizeGraphicsDescriptors();
255
256 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
257 const auto& shader_config = maxwell3d.regs.shader_config[index];
258 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
259
260 // Skip stages that are not enabled
261 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
262 switch (program) {
263 case Maxwell::ShaderProgram::Geometry:
264 program_manager.UseGeometryShader(0);
265 break;
266 case Maxwell::ShaderProgram::Fragment:
267 program_manager.UseFragmentShader(0);
268 break;
269 default:
270 break;
271 }
272 continue;
273 }
274 // Currently this stages are not supported in the OpenGL backend.
275 // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
276 if (program == Maxwell::ShaderProgram::TesselationControl ||
277 program == Maxwell::ShaderProgram::TesselationEval) {
278 continue;
279 }
280
281 Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
282 const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
283 switch (program) {
284 case Maxwell::ShaderProgram::VertexA:
285 case Maxwell::ShaderProgram::VertexB:
286 program_manager.UseVertexShader(program_handle);
287 break;
288 case Maxwell::ShaderProgram::Geometry:
289 program_manager.UseGeometryShader(program_handle);
290 break;
291 case Maxwell::ShaderProgram::Fragment:
292 program_manager.UseFragmentShader(program_handle);
293 break;
294 default:
295 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
296 shader_config.enable.Value(), shader_config.offset);
297 break;
298 }
299
300 // Stage indices are 0 - 5
301 const size_t stage = index == 0 ? 0 : index - 1;
302 shaders[stage] = shader;
303
304 SetupDrawTextures(shader, stage);
305 SetupDrawImages(shader, stage);
306
307 buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
308
309 buffer_cache.UnbindGraphicsStorageBuffers(stage);
310 u32 ssbo_index = 0;
311 for (const auto& buffer : shader->GetEntries().global_memory_entries) {
312 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
313 buffer.cbuf_offset, buffer.is_written);
314 ++ssbo_index;
315 }
316
317 // Workaround for Intel drivers.
318 // When a clip distance is enabled but not set in the shader it crops parts of the screen
319 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
320 // clip distances only when it's written by a shader stage.
321 clip_distances |= shader->GetEntries().clip_distances;
322
323 // When VertexA is enabled, we have dual vertex shaders
324 if (program == Maxwell::ShaderProgram::VertexA) {
325 // VertexB was combined with VertexA, so we skip the VertexB iteration
326 ++index;
327 }
328 }
329 SyncClipEnabled(clip_distances);
330 maxwell3d.dirty.flags[Dirty::Shaders] = false;
331
332 buffer_cache.UpdateGraphicsBuffers(is_indexed);
333
334 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
335 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
336
337 buffer_cache.BindHostGeometryBuffers(is_indexed);
338
339 size_t image_view_index = 0;
340 size_t texture_index = 0;
341 size_t image_index = 0;
342 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
343 const Shader* const shader = shaders[stage];
344 if (!shader) {
345 continue;
346 }
347 buffer_cache.BindHostStageBuffers(stage);
348 const auto& base = device.GetBaseBindings(stage);
349 BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
350 texture_index, image_index);
351 }
352}
353
354void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, 140void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
355 const VideoCore::DiskResourceLoadCallback& callback) { 141 const VideoCore::DiskResourceLoadCallback& callback) {
356 shader_cache.LoadDiskCache(title_id, stop_loading, callback); 142 shader_cache.LoadDiskResources(title_id, stop_loading, callback);
357} 143}
358 144
359void RasterizerOpenGL::Clear() { 145void RasterizerOpenGL::Clear() {
@@ -432,16 +218,15 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
432 218
433 SyncState(); 219 SyncState();
434 220
435 // Setup shaders and their used resources. 221 GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
222 if (!pipeline) {
223 return;
224 }
436 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; 225 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
437 SetupShaders(is_indexed); 226 pipeline->Configure(is_indexed);
438
439 texture_cache.UpdateRenderTargets(false);
440 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
441 program_manager.BindGraphicsPipeline();
442 227
443 const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); 228 const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
444 BeginTransformFeedback(primitive_mode); 229 BeginTransformFeedback(pipeline, primitive_mode);
445 230
446 const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance); 231 const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
447 const GLsizei num_instances = 232 const GLsizei num_instances =
@@ -480,35 +265,24 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
480 num_instances, base_instance); 265 num_instances, base_instance);
481 } 266 }
482 } 267 }
483
484 EndTransformFeedback(); 268 EndTransformFeedback();
485 269
486 ++num_queued_commands; 270 ++num_queued_commands;
271 has_written_global_memory |= pipeline->WritesGlobalMemory();
487 272
488 gpu.TickWork(); 273 gpu.TickWork();
489} 274}
490 275
491void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 276void RasterizerOpenGL::DispatchCompute() {
492 Shader* const kernel = shader_cache.GetComputeKernel(code_addr); 277 ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
493 278 if (!pipeline) {
494 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; 279 return;
495 BindComputeTextures(kernel); 280 }
496 281 pipeline->Configure();
497 const auto& entries = kernel->GetEntries(); 282 const auto& qmd{kepler_compute.launch_description};
498 buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); 283 glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
499 buffer_cache.UnbindComputeStorageBuffers();
500 u32 ssbo_index = 0;
501 for (const auto& buffer : entries.global_memory_entries) {
502 buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
503 buffer.is_written);
504 ++ssbo_index;
505 }
506 buffer_cache.UpdateComputeBuffers();
507 buffer_cache.BindHostComputeBuffers();
508
509 const auto& launch_desc = kepler_compute.launch_description;
510 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
511 ++num_queued_commands; 284 ++num_queued_commands;
285 has_written_global_memory |= pipeline->WritesGlobalMemory();
512} 286}
513 287
514void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { 288void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@@ -661,7 +435,7 @@ void RasterizerOpenGL::WaitForIdle() {
661} 435}
662 436
663void RasterizerOpenGL::FragmentBarrier() { 437void RasterizerOpenGL::FragmentBarrier() {
664 glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT); 438 glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT);
665} 439}
666 440
667void RasterizerOpenGL::TiledCacheBarrier() { 441void RasterizerOpenGL::TiledCacheBarrier() {
@@ -674,6 +448,13 @@ void RasterizerOpenGL::FlushCommands() {
674 return; 448 return;
675 } 449 }
676 num_queued_commands = 0; 450 num_queued_commands = 0;
451
452 // Make sure memory stored from the previous GL command stream is visible
453 // This is only needed on assembly shaders where we write to GPU memory with raw pointers
454 if (has_written_global_memory) {
455 has_written_global_memory = false;
456 glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
457 }
677 glFlush(); 458 glFlush();
678} 459}
679 460
@@ -721,111 +502,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
721 // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); 502 // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
722 // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); 503 // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
723 504
724 screen_info.display_texture = image_view->Handle(ImageViewType::e2D); 505 screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
725 screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); 506 screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
726 return true; 507 return true;
727} 508}
728 509
729void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
730 image_view_indices.clear();
731 sampler_handles.clear();
732
733 texture_cache.SynchronizeComputeDescriptors();
734
735 SetupComputeTextures(kernel);
736 SetupComputeImages(kernel);
737
738 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
739 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
740
741 program_manager.BindCompute(kernel->GetHandle());
742 size_t image_view_index = 0;
743 size_t texture_index = 0;
744 size_t image_index = 0;
745 BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
746}
747
748void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
749 GLuint base_image, size_t& image_view_index,
750 size_t& texture_index, size_t& image_index) {
751 const GLuint* const samplers = sampler_handles.data() + texture_index;
752 const GLuint* const textures = texture_handles.data() + texture_index;
753 const GLuint* const images = image_handles.data() + image_index;
754
755 const size_t num_samplers = entries.samplers.size();
756 for (const auto& sampler : entries.samplers) {
757 for (size_t i = 0; i < sampler.size; ++i) {
758 const ImageViewId image_view_id = image_view_ids[image_view_index++];
759 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
760 const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
761 texture_handles[texture_index++] = handle;
762 }
763 }
764 const size_t num_images = entries.images.size();
765 for (size_t unit = 0; unit < num_images; ++unit) {
766 // TODO: Mark as modified
767 const ImageViewId image_view_id = image_view_ids[image_view_index++];
768 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
769 const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
770 image_handles[image_index] = handle;
771 ++image_index;
772 }
773 if (num_samplers > 0) {
774 glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
775 glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
776 }
777 if (num_images > 0) {
778 glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
779 }
780}
781
782void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
783 const bool via_header_index =
784 maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
785 for (const auto& entry : shader->GetEntries().samplers) {
786 const auto shader_type = static_cast<ShaderType>(stage_index);
787 for (size_t index = 0; index < entry.size; ++index) {
788 const auto handle =
789 GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
790 const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
791 sampler_handles.push_back(sampler->Handle());
792 image_view_indices.push_back(handle.image);
793 }
794 }
795}
796
797void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
798 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
799 for (const auto& entry : kernel->GetEntries().samplers) {
800 for (size_t i = 0; i < entry.size; ++i) {
801 const auto handle =
802 GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
803 const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
804 sampler_handles.push_back(sampler->Handle());
805 image_view_indices.push_back(handle.image);
806 }
807 }
808}
809
810void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
811 const bool via_header_index =
812 maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
813 for (const auto& entry : shader->GetEntries().images) {
814 const auto shader_type = static_cast<ShaderType>(stage_index);
815 const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
816 image_view_indices.push_back(handle.image);
817 }
818}
819
820void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
821 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
822 for (const auto& entry : shader->GetEntries().images) {
823 const auto handle =
824 GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
825 image_view_indices.push_back(handle.image);
826 }
827}
828
829void RasterizerOpenGL::SyncState() { 510void RasterizerOpenGL::SyncState() {
830 SyncViewport(); 511 SyncViewport();
831 SyncRasterizeEnable(); 512 SyncRasterizeEnable();
@@ -941,7 +622,7 @@ void RasterizerOpenGL::SyncDepthClamp() {
941 622
942void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) { 623void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
943 auto& flags = maxwell3d.dirty.flags; 624 auto& flags = maxwell3d.dirty.flags;
944 if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) { 625 if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) {
945 return; 626 return;
946 } 627 }
947 flags[Dirty::ClipDistances] = false; 628 flags[Dirty::ClipDistances] = false;
@@ -1318,68 +999,13 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
1318 oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb); 999 oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb);
1319} 1000}
1320 1001
1321void RasterizerOpenGL::SyncTransformFeedback() { 1002void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum primitive_mode) {
1322 // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
1323 // when this is required.
1324 const auto& regs = maxwell3d.regs;
1325
1326 static constexpr std::size_t STRIDE = 3;
1327 std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
1328 std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
1329
1330 GLint* cursor = attribs.data();
1331 GLint* current_stream = streams.data();
1332
1333 for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
1334 const auto& layout = regs.tfb_layouts[feedback];
1335 UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
1336 if (layout.varying_count == 0) {
1337 continue;
1338 }
1339
1340 *current_stream = static_cast<GLint>(feedback);
1341 if (current_stream != streams.data()) {
1342 // When stepping one stream, push the expected token
1343 cursor[0] = GL_NEXT_BUFFER_NV;
1344 cursor[1] = 0;
1345 cursor[2] = 0;
1346 cursor += STRIDE;
1347 }
1348 ++current_stream;
1349
1350 const auto& locations = regs.tfb_varying_locs[feedback];
1351 std::optional<u8> current_index;
1352 for (u32 offset = 0; offset < layout.varying_count; ++offset) {
1353 const u8 location = locations[offset];
1354 const u8 index = location / 4;
1355
1356 if (current_index == index) {
1357 // Increase number of components of the previous attachment
1358 ++cursor[-2];
1359 continue;
1360 }
1361 current_index = index;
1362
1363 std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
1364 cursor[1] = 1;
1365 cursor += STRIDE;
1366 }
1367 }
1368
1369 const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
1370 const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
1371 glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
1372 GL_INTERLEAVED_ATTRIBS);
1373}
1374
1375void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
1376 const auto& regs = maxwell3d.regs; 1003 const auto& regs = maxwell3d.regs;
1377 if (regs.tfb_enabled == 0) { 1004 if (regs.tfb_enabled == 0) {
1378 return; 1005 return;
1379 } 1006 }
1380 if (device.UseAssemblyShaders()) { 1007 program->ConfigureTransformFeedback();
1381 SyncTransformFeedback(); 1008
1382 }
1383 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || 1009 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
1384 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || 1010 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
1385 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); 1011 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
@@ -1393,11 +1019,9 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
1393} 1019}
1394 1020
1395void RasterizerOpenGL::EndTransformFeedback() { 1021void RasterizerOpenGL::EndTransformFeedback() {
1396 const auto& regs = maxwell3d.regs; 1022 if (maxwell3d.regs.tfb_enabled != 0) {
1397 if (regs.tfb_enabled == 0) { 1023 glEndTransformFeedback();
1398 return;
1399 } 1024 }
1400 glEndTransformFeedback();
1401} 1025}
1402 1026
1403AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} 1027AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d30ad698f..d0397b745 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -28,11 +28,9 @@
28#include "video_core/renderer_opengl/gl_query_cache.h" 28#include "video_core/renderer_opengl/gl_query_cache.h"
29#include "video_core/renderer_opengl/gl_resource_manager.h" 29#include "video_core/renderer_opengl/gl_resource_manager.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 30#include "video_core/renderer_opengl/gl_shader_cache.h"
31#include "video_core/renderer_opengl/gl_shader_decompiler.h"
32#include "video_core/renderer_opengl/gl_shader_manager.h" 31#include "video_core/renderer_opengl/gl_shader_manager.h"
33#include "video_core/renderer_opengl/gl_state_tracker.h" 32#include "video_core/renderer_opengl/gl_state_tracker.h"
34#include "video_core/renderer_opengl/gl_texture_cache.h" 33#include "video_core/renderer_opengl/gl_texture_cache.h"
35#include "video_core/shader/async_shaders.h"
36#include "video_core/textures/texture.h" 34#include "video_core/textures/texture.h"
37 35
38namespace Core::Memory { 36namespace Core::Memory {
@@ -81,7 +79,7 @@ public:
81 79
82 void Draw(bool is_indexed, bool is_instanced) override; 80 void Draw(bool is_indexed, bool is_instanced) override;
83 void Clear() override; 81 void Clear() override;
84 void DispatchCompute(GPUVAddr code_addr) override; 82 void DispatchCompute() override;
85 void ResetCounter(VideoCore::QueryType type) override; 83 void ResetCounter(VideoCore::QueryType type) override;
86 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 84 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
87 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 85 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@@ -118,36 +116,11 @@ public:
118 return num_queued_commands > 0; 116 return num_queued_commands > 0;
119 } 117 }
120 118
121 VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
122 return async_shaders;
123 }
124
125 const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
126 return async_shaders;
127 }
128
129private: 119private:
130 static constexpr size_t MAX_TEXTURES = 192; 120 static constexpr size_t MAX_TEXTURES = 192;
131 static constexpr size_t MAX_IMAGES = 48; 121 static constexpr size_t MAX_IMAGES = 48;
132 static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; 122 static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
133 123
134 void BindComputeTextures(Shader* kernel);
135
136 void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
137 size_t& image_view_index, size_t& texture_index, size_t& image_index);
138
139 /// Configures the current textures to use for the draw command.
140 void SetupDrawTextures(const Shader* shader, size_t stage_index);
141
142 /// Configures the textures used in a compute shader.
143 void SetupComputeTextures(const Shader* kernel);
144
145 /// Configures images in a graphics shader.
146 void SetupDrawImages(const Shader* shader, size_t stage_index);
147
148 /// Configures images in a compute shader.
149 void SetupComputeImages(const Shader* shader);
150
151 /// Syncs state to match guest's 124 /// Syncs state to match guest's
152 void SyncState(); 125 void SyncState();
153 126
@@ -220,18 +193,12 @@ private:
220 /// Syncs vertex instances to match the guest state 193 /// Syncs vertex instances to match the guest state
221 void SyncVertexInstances(); 194 void SyncVertexInstances();
222 195
223 /// Syncs transform feedback state to match guest state
224 /// @note Only valid on assembly shaders
225 void SyncTransformFeedback();
226
227 /// Begin a transform feedback 196 /// Begin a transform feedback
228 void BeginTransformFeedback(GLenum primitive_mode); 197 void BeginTransformFeedback(GraphicsPipeline* pipeline, GLenum primitive_mode);
229 198
230 /// End a transform feedback 199 /// End a transform feedback
231 void EndTransformFeedback(); 200 void EndTransformFeedback();
232 201
233 void SetupShaders(bool is_indexed);
234
235 Tegra::GPU& gpu; 202 Tegra::GPU& gpu;
236 Tegra::Engines::Maxwell3D& maxwell3d; 203 Tegra::Engines::Maxwell3D& maxwell3d;
237 Tegra::Engines::KeplerCompute& kepler_compute; 204 Tegra::Engines::KeplerCompute& kepler_compute;
@@ -246,13 +213,11 @@ private:
246 TextureCache texture_cache; 213 TextureCache texture_cache;
247 BufferCacheRuntime buffer_cache_runtime; 214 BufferCacheRuntime buffer_cache_runtime;
248 BufferCache buffer_cache; 215 BufferCache buffer_cache;
249 ShaderCacheOpenGL shader_cache; 216 ShaderCache shader_cache;
250 QueryCache query_cache; 217 QueryCache query_cache;
251 AccelerateDMA accelerate_dma; 218 AccelerateDMA accelerate_dma;
252 FenceManagerOpenGL fence_manager; 219 FenceManagerOpenGL fence_manager;
253 220
254 VideoCommon::Shader::AsyncShaders async_shaders;
255
256 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; 221 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
257 std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; 222 std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
258 boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; 223 boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
@@ -260,7 +225,8 @@ private:
260 std::array<GLuint, MAX_IMAGES> image_handles{}; 225 std::array<GLuint, MAX_IMAGES> image_handles{};
261 226
262 /// Number of commands queued to the OpenGL driver. Resetted on flush. 227 /// Number of commands queued to the OpenGL driver. Resetted on flush.
263 std::size_t num_queued_commands = 0; 228 size_t num_queued_commands = 0;
229 bool has_written_global_memory = false;
264 230
265 u32 last_clip_distance_mask = 0; 231 u32 last_clip_distance_mask = 0;
266}; 232};
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 3428e5e21..8695c29e3 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -83,18 +83,6 @@ void OGLSampler::Release() {
83 handle = 0; 83 handle = 0;
84} 84}
85 85
86void OGLShader::Create(std::string_view source, GLenum type) {
87 if (handle != 0) {
88 return;
89 }
90 if (source.empty()) {
91 return;
92 }
93
94 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
95 handle = GLShader::LoadShader(source, type);
96}
97
98void OGLShader::Release() { 86void OGLShader::Release() {
99 if (handle == 0) 87 if (handle == 0)
100 return; 88 return;
@@ -104,21 +92,6 @@ void OGLShader::Release() {
104 handle = 0; 92 handle = 0;
105} 93}
106 94
107void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
108 const char* frag_shader, bool separable_program,
109 bool hint_retrievable) {
110 OGLShader vert, geo, frag;
111 if (vert_shader)
112 vert.Create(vert_shader, GL_VERTEX_SHADER);
113 if (geo_shader)
114 geo.Create(geo_shader, GL_GEOMETRY_SHADER);
115 if (frag_shader)
116 frag.Create(frag_shader, GL_FRAGMENT_SHADER);
117
118 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
119 Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle);
120}
121
122void OGLProgram::Release() { 95void OGLProgram::Release() {
123 if (handle == 0) 96 if (handle == 0)
124 return; 97 return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 552d79db4..b2d5bfd3b 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -8,7 +8,6 @@
8#include <utility> 8#include <utility>
9#include <glad/glad.h> 9#include <glad/glad.h>
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/renderer_opengl/gl_shader_util.h"
12 11
13namespace OpenGL { 12namespace OpenGL {
14 13
@@ -128,8 +127,6 @@ public:
128 return *this; 127 return *this;
129 } 128 }
130 129
131 void Create(std::string_view source, GLenum type);
132
133 void Release(); 130 void Release();
134 131
135 GLuint handle = 0; 132 GLuint handle = 0;
@@ -151,17 +148,6 @@ public:
151 return *this; 148 return *this;
152 } 149 }
153 150
154 template <typename... T>
155 void Create(bool separable_program, bool hint_retrievable, T... shaders) {
156 if (handle != 0)
157 return;
158 handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...);
159 }
160
161 /// Creates a new internal OpenGL resource and stores the handle
162 void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
163 bool separable_program = false, bool hint_retrievable = false);
164
165 /// Deletes the internal OpenGL resource 151 /// Deletes the internal OpenGL resource
166 void Release(); 152 void Release();
167 153
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 5a01c59ec..8d6cc074c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -3,606 +3,544 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <atomic> 5#include <atomic>
6#include <fstream>
6#include <functional> 7#include <functional>
7#include <mutex> 8#include <mutex>
8#include <optional>
9#include <string> 9#include <string>
10#include <thread> 10#include <thread>
11#include <unordered_set>
12 11
13#include "common/alignment.h" 12#include "common/alignment.h"
14#include "common/assert.h" 13#include "common/assert.h"
14#include "common/fs/fs.h"
15#include "common/fs/path_util.h"
15#include "common/logging/log.h" 16#include "common/logging/log.h"
16#include "common/scope_exit.h" 17#include "common/scope_exit.h"
18#include "common/settings.h"
19#include "common/thread_worker.h"
17#include "core/core.h" 20#include "core/core.h"
18#include "core/frontend/emu_window.h" 21#include "shader_recompiler/backend/glasm/emit_glasm.h"
22#include "shader_recompiler/backend/glsl/emit_glsl.h"
23#include "shader_recompiler/backend/spirv/emit_spirv.h"
24#include "shader_recompiler/frontend/ir/program.h"
25#include "shader_recompiler/frontend/maxwell/control_flow.h"
26#include "shader_recompiler/frontend/maxwell/translate_program.h"
27#include "shader_recompiler/profile.h"
19#include "video_core/engines/kepler_compute.h" 28#include "video_core/engines/kepler_compute.h"
20#include "video_core/engines/maxwell_3d.h" 29#include "video_core/engines/maxwell_3d.h"
21#include "video_core/engines/shader_type.h"
22#include "video_core/memory_manager.h" 30#include "video_core/memory_manager.h"
23#include "video_core/renderer_opengl/gl_arb_decompiler.h"
24#include "video_core/renderer_opengl/gl_rasterizer.h" 31#include "video_core/renderer_opengl/gl_rasterizer.h"
25#include "video_core/renderer_opengl/gl_resource_manager.h" 32#include "video_core/renderer_opengl/gl_resource_manager.h"
26#include "video_core/renderer_opengl/gl_shader_cache.h" 33#include "video_core/renderer_opengl/gl_shader_cache.h"
27#include "video_core/renderer_opengl/gl_shader_decompiler.h" 34#include "video_core/renderer_opengl/gl_shader_util.h"
28#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
29#include "video_core/renderer_opengl/gl_state_tracker.h" 35#include "video_core/renderer_opengl/gl_state_tracker.h"
30#include "video_core/shader/memory_util.h"
31#include "video_core/shader/registry.h"
32#include "video_core/shader/shader_ir.h"
33#include "video_core/shader_cache.h" 36#include "video_core/shader_cache.h"
37#include "video_core/shader_environment.h"
34#include "video_core/shader_notify.h" 38#include "video_core/shader_notify.h"
35 39
36namespace OpenGL { 40namespace OpenGL {
37
38using Tegra::Engines::ShaderType;
39using VideoCommon::Shader::GetShaderAddress;
40using VideoCommon::Shader::GetShaderCode;
41using VideoCommon::Shader::GetUniqueIdentifier;
42using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
43using VideoCommon::Shader::ProgramCode;
44using VideoCommon::Shader::Registry;
45using VideoCommon::Shader::ShaderIR;
46using VideoCommon::Shader::STAGE_MAIN_OFFSET;
47
48namespace { 41namespace {
49 42using Shader::Backend::GLASM::EmitGLASM;
50constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; 43using Shader::Backend::GLSL::EmitGLSL;
51 44using Shader::Backend::SPIRV::EmitSPIRV;
52/// Gets the shader type from a Maxwell program type 45using Shader::Maxwell::MergeDualVertexPrograms;
53constexpr GLenum GetGLShaderType(ShaderType shader_type) { 46using Shader::Maxwell::TranslateProgram;
54 switch (shader_type) { 47using VideoCommon::ComputeEnvironment;
55 case ShaderType::Vertex: 48using VideoCommon::FileEnvironment;
56 return GL_VERTEX_SHADER; 49using VideoCommon::GenericEnvironment;
57 case ShaderType::Geometry: 50using VideoCommon::GraphicsEnvironment;
58 return GL_GEOMETRY_SHADER; 51using VideoCommon::LoadPipelines;
59 case ShaderType::Fragment: 52using VideoCommon::SerializePipeline;
60 return GL_FRAGMENT_SHADER; 53using Context = ShaderContext::Context;
61 case ShaderType::Compute: 54
62 return GL_COMPUTE_SHADER; 55constexpr u32 CACHE_VERSION = 5;
63 default: 56
64 return GL_NONE; 57template <typename Container>
65 } 58auto MakeSpan(Container& container) {
59 return std::span(container.data(), container.size());
66} 60}
67 61
68constexpr const char* GetShaderTypeName(ShaderType shader_type) { 62Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
69 switch (shader_type) { 63 const Shader::IR::Program& program,
70 case ShaderType::Vertex: 64 const Shader::IR::Program* previous_program,
71 return "VS"; 65 bool glasm_use_storage_buffers, bool use_assembly_shaders) {
72 case ShaderType::TesselationControl: 66 Shader::RuntimeInfo info;
73 return "HS"; 67 if (previous_program) {
74 case ShaderType::TesselationEval: 68 info.previous_stage_stores = previous_program->info.stores;
75 return "DS"; 69 } else {
76 case ShaderType::Geometry: 70 // Mark all stores as available for vertex shaders
77 return "GS"; 71 info.previous_stage_stores.mask.set();
78 case ShaderType::Fragment: 72 }
79 return "FS"; 73 switch (program.stage) {
80 case ShaderType::Compute: 74 case Shader::Stage::VertexB:
81 return "CS"; 75 case Shader::Stage::Geometry:
82 } 76 if (!use_assembly_shaders && key.xfb_enabled != 0) {
83 return "UNK"; 77 info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
78 }
79 break;
80 case Shader::Stage::TessellationEval:
81 info.tess_clockwise = key.tessellation_clockwise != 0;
82 info.tess_primitive = [&key] {
83 switch (key.tessellation_primitive) {
84 case Maxwell::TessellationPrimitive::Isolines:
85 return Shader::TessPrimitive::Isolines;
86 case Maxwell::TessellationPrimitive::Triangles:
87 return Shader::TessPrimitive::Triangles;
88 case Maxwell::TessellationPrimitive::Quads:
89 return Shader::TessPrimitive::Quads;
90 }
91 UNREACHABLE();
92 return Shader::TessPrimitive::Triangles;
93 }();
94 info.tess_spacing = [&] {
95 switch (key.tessellation_spacing) {
96 case Maxwell::TessellationSpacing::Equal:
97 return Shader::TessSpacing::Equal;
98 case Maxwell::TessellationSpacing::FractionalOdd:
99 return Shader::TessSpacing::FractionalOdd;
100 case Maxwell::TessellationSpacing::FractionalEven:
101 return Shader::TessSpacing::FractionalEven;
102 }
103 UNREACHABLE();
104 return Shader::TessSpacing::Equal;
105 }();
106 break;
107 case Shader::Stage::Fragment:
108 info.force_early_z = key.early_z != 0;
109 break;
110 default:
111 break;
112 }
113 switch (key.gs_input_topology) {
114 case Maxwell::PrimitiveTopology::Points:
115 info.input_topology = Shader::InputTopology::Points;
116 break;
117 case Maxwell::PrimitiveTopology::Lines:
118 case Maxwell::PrimitiveTopology::LineLoop:
119 case Maxwell::PrimitiveTopology::LineStrip:
120 info.input_topology = Shader::InputTopology::Lines;
121 break;
122 case Maxwell::PrimitiveTopology::Triangles:
123 case Maxwell::PrimitiveTopology::TriangleStrip:
124 case Maxwell::PrimitiveTopology::TriangleFan:
125 case Maxwell::PrimitiveTopology::Quads:
126 case Maxwell::PrimitiveTopology::QuadStrip:
127 case Maxwell::PrimitiveTopology::Polygon:
128 case Maxwell::PrimitiveTopology::Patches:
129 info.input_topology = Shader::InputTopology::Triangles;
130 break;
131 case Maxwell::PrimitiveTopology::LinesAdjacency:
132 case Maxwell::PrimitiveTopology::LineStripAdjacency:
133 info.input_topology = Shader::InputTopology::LinesAdjacency;
134 break;
135 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
136 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
137 info.input_topology = Shader::InputTopology::TrianglesAdjacency;
138 break;
139 }
140 info.glasm_use_storage_buffers = glasm_use_storage_buffers;
141 return info;
84} 142}
85 143
86constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { 144void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) {
87 switch (program_type) { 145 std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) {
88 case Maxwell::ShaderProgram::VertexA: 146 return VideoCommon::TransformFeedbackState::Layout{
89 case Maxwell::ShaderProgram::VertexB: 147 .stream = layout.stream,
90 return ShaderType::Vertex; 148 .varying_count = layout.varying_count,
91 case Maxwell::ShaderProgram::TesselationControl: 149 .stride = layout.stride,
92 return ShaderType::TesselationControl; 150 };
93 case Maxwell::ShaderProgram::TesselationEval: 151 });
94 return ShaderType::TesselationEval; 152 state.varyings = regs.tfb_varying_locs;
95 case Maxwell::ShaderProgram::Geometry:
96 return ShaderType::Geometry;
97 case Maxwell::ShaderProgram::Fragment:
98 return ShaderType::Fragment;
99 }
100 return {};
101} 153}
154} // Anonymous namespace
102 155
103constexpr GLenum AssemblyEnum(ShaderType shader_type) { 156ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
104 switch (shader_type) { 157 Tegra::Engines::Maxwell3D& maxwell3d_,
105 case ShaderType::Vertex: 158 Tegra::Engines::KeplerCompute& kepler_compute_,
106 return GL_VERTEX_PROGRAM_NV; 159 Tegra::MemoryManager& gpu_memory_, const Device& device_,
107 case ShaderType::TesselationControl: 160 TextureCache& texture_cache_, BufferCache& buffer_cache_,
108 return GL_TESS_CONTROL_PROGRAM_NV; 161 ProgramManager& program_manager_, StateTracker& state_tracker_,
109 case ShaderType::TesselationEval: 162 VideoCore::ShaderNotify& shader_notify_)
110 return GL_TESS_EVALUATION_PROGRAM_NV; 163 : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
111 case ShaderType::Geometry: 164 emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_},
112 return GL_GEOMETRY_PROGRAM_NV; 165 buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_},
113 case ShaderType::Fragment: 166 shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()},
114 return GL_FRAGMENT_PROGRAM_NV; 167 profile{
115 case ShaderType::Compute: 168 .supported_spirv = 0x00010000,
116 return GL_COMPUTE_PROGRAM_NV; 169
170 .unified_descriptor_binding = false,
171 .support_descriptor_aliasing = false,
172 .support_int8 = false,
173 .support_int16 = false,
174 .support_int64 = device.HasShaderInt64(),
175 .support_vertex_instance_id = true,
176 .support_float_controls = false,
177 .support_separate_denorm_behavior = false,
178 .support_separate_rounding_mode = false,
179 .support_fp16_denorm_preserve = false,
180 .support_fp32_denorm_preserve = false,
181 .support_fp16_denorm_flush = false,
182 .support_fp32_denorm_flush = false,
183 .support_fp16_signed_zero_nan_preserve = false,
184 .support_fp32_signed_zero_nan_preserve = false,
185 .support_fp64_signed_zero_nan_preserve = false,
186 .support_explicit_workgroup_layout = false,
187 .support_vote = true,
188 .support_viewport_index_layer_non_geometry =
189 device.HasNvViewportArray2() || device.HasVertexViewportLayer(),
190 .support_viewport_mask = device.HasNvViewportArray2(),
191 .support_typeless_image_loads = device.HasImageLoadFormatted(),
192 .support_demote_to_helper_invocation = false,
193 .support_int64_atomics = false,
194 .support_derivative_control = device.HasDerivativeControl(),
195 .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
196 .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(),
197 .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(),
198 .support_gl_texture_shadow_lod = device.HasTextureShadowLod(),
199 .support_gl_warp_intrinsics = false,
200 .support_gl_variable_aoffi = device.HasVariableAoffi(),
201 .support_gl_sparse_textures = device.HasSparseTexture2(),
202 .support_gl_derivative_control = device.HasDerivativeControl(),
203
204 .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(),
205
206 .lower_left_origin_mode = true,
207 .need_declared_frag_colors = true,
208 .need_fastmath_off = device.NeedsFastmathOff(),
209
210 .has_broken_spirv_clamp = true,
211 .has_broken_unsigned_image_offsets = true,
212 .has_broken_signed_operations = true,
213 .has_broken_fp16_float_controls = false,
214 .has_gl_component_indexing_bug = device.HasComponentIndexingBug(),
215 .has_gl_precise_bug = device.HasPreciseBug(),
216 .ignore_nan_fp_comparisons = true,
217 .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
218 },
219 host_info{
220 .support_float16 = false,
221 .support_int64 = device.HasShaderInt64(),
222 } {
223 if (use_asynchronous_shaders) {
224 workers = CreateWorkers();
117 } 225 }
118 return {};
119} 226}
120 227
121std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { 228ShaderCache::~ShaderCache() = default;
122 return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
123}
124 229
125std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { 230void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
126 const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; 231 const VideoCore::DiskResourceLoadCallback& callback) {
127 const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, 232 if (title_id == 0) {
128 entry.graphics_info, entry.compute_info}; 233 return;
129 auto registry = std::make_shared<Registry>(entry.type, info);
130 for (const auto& [address, value] : entry.keys) {
131 const auto [buffer, offset] = address;
132 registry->InsertKey(buffer, offset, value);
133 }
134 for (const auto& [offset, sampler] : entry.bound_samplers) {
135 registry->InsertBoundSampler(offset, sampler);
136 } 234 }
137 for (const auto& [key, sampler] : entry.bindless_samplers) { 235 const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)};
138 const auto [buffer, offset] = key; 236 const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)};
139 registry->InsertBindlessSampler(buffer, offset, sampler); 237 if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) {
238 LOG_ERROR(Common_Filesystem, "Failed to create shader cache directories");
239 return;
140 } 240 }
141 return registry; 241 shader_cache_filename = base_dir / "opengl.bin";
142} 242
143 243 if (!workers) {
144std::unordered_set<GLenum> GetSupportedFormats() { 244 workers = CreateWorkers();
145 GLint num_formats; 245 }
146 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); 246 struct {
247 std::mutex mutex;
248 size_t total{};
249 size_t built{};
250 bool has_loaded{};
251 } state;
252
253 const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
254 ComputePipelineKey key;
255 file.read(reinterpret_cast<char*>(&key), sizeof(key));
256 workers->QueueWork(
257 [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable {
258 ctx->pools.ReleaseContents();
259 auto pipeline{CreateComputePipeline(ctx->pools, key, env)};
260 std::lock_guard lock{state.mutex};
261 if (pipeline) {
262 compute_cache.emplace(key, std::move(pipeline));
263 }
264 ++state.built;
265 if (state.has_loaded) {
266 callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
267 }
268 });
269 ++state.total;
270 }};
271 const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
272 GraphicsPipelineKey key;
273 file.read(reinterpret_cast<char*>(&key), sizeof(key));
274 workers->QueueWork(
275 [this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable {
276 boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
277 for (auto& env : envs) {
278 env_ptrs.push_back(&env);
279 }
280 ctx->pools.ReleaseContents();
281 auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)};
282 std::lock_guard lock{state.mutex};
283 if (pipeline) {
284 graphics_cache.emplace(key, std::move(pipeline));
285 }
286 ++state.built;
287 if (state.has_loaded) {
288 callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
289 }
290 });
291 ++state.total;
292 }};
293 LoadPipelines(stop_loading, shader_cache_filename, CACHE_VERSION, load_compute, load_graphics);
147 294
148 std::vector<GLint> formats(num_formats); 295 std::unique_lock lock{state.mutex};
149 glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); 296 callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
297 state.has_loaded = true;
298 lock.unlock();
150 299
151 std::unordered_set<GLenum> supported_formats; 300 workers->WaitForRequests();
152 for (const GLint format : formats) { 301 if (!use_asynchronous_shaders) {
153 supported_formats.insert(static_cast<GLenum>(format)); 302 workers.reset();
154 } 303 }
155 return supported_formats;
156} 304}
157 305
158} // Anonymous namespace 306GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
159 307 if (!RefreshStages(graphics_key.unique_hashes)) {
160ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, 308 current_pipeline = nullptr;
161 const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { 309 return nullptr;
162 if (device.UseDriverCache()) { 310 }
163 // Ignore hint retrievable if we are using the driver cache 311 const auto& regs{maxwell3d.regs};
164 hint_retrievable = false; 312 graphics_key.raw = 0;
165 } 313 graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
166 const std::string shader_id = MakeShaderID(unique_identifier, shader_type); 314 graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
167 LOG_INFO(Render_OpenGL, "{}", shader_id); 315 ? regs.draw.topology.Value()
168 316 : Maxwell::PrimitiveTopology{});
169 auto program = std::make_shared<ProgramHandle>(); 317 graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value());
170 318 graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value());
171 if (device.UseAssemblyShaders()) { 319 graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
172 const std::string arb = 320 graphics_key.xfb_enabled.Assign(regs.tfb_enabled != 0 ? 1 : 0);
173 DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); 321 if (graphics_key.xfb_enabled) {
174 322 SetXfbState(graphics_key.xfb_state, regs);
175 GLuint& arb_prog = program->assembly_program.handle; 323 }
176 324 if (current_pipeline && graphics_key == current_pipeline->Key()) {
177// Commented out functions signal OpenGL errors but are compatible with apitrace. 325 return BuiltPipeline(current_pipeline);
178// Use them only to capture and replay on apitrace. 326 }
179#if 0 327 return CurrentGraphicsPipelineSlowPath();
180 glGenProgramsNV(1, &arb_prog);
181 glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
182 reinterpret_cast<const GLubyte*>(arb.data()));
183#else
184 glGenProgramsARB(1, &arb_prog);
185 glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
186 static_cast<GLsizei>(arb.size()), arb.data());
187#endif
188 const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
189 if (err && *err) {
190 LOG_CRITICAL(Render_OpenGL, "{}", err);
191 LOG_INFO(Render_OpenGL, "\n{}", arb);
192 }
193 } else {
194 const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
195 OGLShader shader;
196 shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
197
198 program->source_program.Create(true, hint_retrievable, shader.handle);
199 }
200
201 return program;
202} 328}
203 329
204Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_, 330GraphicsPipeline* ShaderCache::CurrentGraphicsPipelineSlowPath() {
205 ProgramSharedPtr program_, bool is_built_) 331 const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
206 : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, 332 auto& pipeline{pair->second};
207 is_built{is_built_} { 333 if (is_new) {
208 handle = program->assembly_program.handle; 334 pipeline = CreateGraphicsPipeline();
209 if (handle == 0) {
210 handle = program->source_program.handle;
211 } 335 }
212 if (is_built) { 336 if (!pipeline) {
213 ASSERT(handle != 0); 337 return nullptr;
214 } 338 }
339 current_pipeline = pipeline.get();
340 return BuiltPipeline(current_pipeline);
215} 341}
216 342
217Shader::~Shader() = default; 343GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept {
218 344 if (pipeline->IsBuilt()) {
219GLuint Shader::GetHandle() const { 345 return pipeline;
220 DEBUG_ASSERT(registry->IsConsistent());
221 return handle;
222}
223
224bool Shader::IsBuilt() const {
225 return is_built;
226}
227
228void Shader::AsyncOpenGLBuilt(OGLProgram new_program) {
229 program->source_program = std::move(new_program);
230 handle = program->source_program.handle;
231 is_built = true;
232}
233
234void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) {
235 program->assembly_program = std::move(new_program);
236 handle = program->assembly_program.handle;
237 is_built = true;
238}
239
240std::unique_ptr<Shader> Shader::CreateStageFromMemory(
241 const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code,
242 ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
243 const auto shader_type = GetShaderType(program_type);
244
245 auto& gpu = params.gpu;
246 gpu.ShaderNotify().MarkSharderBuilding();
247
248 auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
249 if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) {
250 const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
251 // TODO(Rodrigo): Handle VertexA shaders
252 // std::optional<ShaderIR> ir_b;
253 // if (!code_b.empty()) {
254 // ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
255 // }
256 auto program =
257 BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
258 ShaderDiskCacheEntry entry;
259 entry.type = shader_type;
260 entry.code = std::move(code);
261 entry.code_b = std::move(code_b);
262 entry.unique_identifier = params.unique_identifier;
263 entry.bound_buffer = registry->GetBoundBuffer();
264 entry.graphics_info = registry->GetGraphicsInfo();
265 entry.keys = registry->GetKeys();
266 entry.bound_samplers = registry->GetBoundSamplers();
267 entry.bindless_samplers = registry->GetBindlessSamplers();
268 params.disk_cache.SaveEntry(std::move(entry));
269
270 gpu.ShaderNotify().MarkShaderComplete();
271
272 return std::unique_ptr<Shader>(new Shader(std::move(registry),
273 MakeEntries(params.device, ir, shader_type),
274 std::move(program), true));
275 } else {
276 // Required for entries
277 const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
278 auto entries = MakeEntries(params.device, ir, shader_type);
279
280 async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier,
281 std::move(code), std::move(code_b), STAGE_MAIN_OFFSET,
282 COMPILER_SETTINGS, *registry, cpu_addr);
283
284 auto program = std::make_shared<ProgramHandle>();
285 return std::unique_ptr<Shader>(
286 new Shader(std::move(registry), std::move(entries), std::move(program), false));
287 } 346 }
288} 347 if (!use_asynchronous_shaders) {
289 348 return pipeline;
290std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
291 ProgramCode code) {
292 auto& gpu = params.gpu;
293 gpu.ShaderNotify().MarkSharderBuilding();
294
295 auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine);
296 const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
297 const u64 uid = params.unique_identifier;
298 auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
299
300 ShaderDiskCacheEntry entry;
301 entry.type = ShaderType::Compute;
302 entry.code = std::move(code);
303 entry.unique_identifier = uid;
304 entry.bound_buffer = registry->GetBoundBuffer();
305 entry.compute_info = registry->GetComputeInfo();
306 entry.keys = registry->GetKeys();
307 entry.bound_samplers = registry->GetBoundSamplers();
308 entry.bindless_samplers = registry->GetBindlessSamplers();
309 params.disk_cache.SaveEntry(std::move(entry));
310
311 gpu.ShaderNotify().MarkShaderComplete();
312
313 return std::unique_ptr<Shader>(new Shader(std::move(registry),
314 MakeEntries(params.device, ir, ShaderType::Compute),
315 std::move(program)));
316}
317
318std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
319 const PrecompiledShader& precompiled_shader) {
320 return std::unique_ptr<Shader>(new Shader(
321 precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
322}
323
324ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
325 Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
326 Tegra::Engines::Maxwell3D& maxwell3d_,
327 Tegra::Engines::KeplerCompute& kepler_compute_,
328 Tegra::MemoryManager& gpu_memory_, const Device& device_)
329 : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_},
330 maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {}
331
332ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
333
334void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading,
335 const VideoCore::DiskResourceLoadCallback& callback) {
336 disk_cache.BindTitleID(title_id);
337 const std::optional transferable = disk_cache.LoadTransferable();
338
339 LOG_INFO(Render_OpenGL, "Total Shader Count: {}",
340 transferable.has_value() ? transferable->size() : 0);
341
342 if (!transferable) {
343 return;
344 } 349 }
345 350 // If something is using depth, we can assume that games are not rendering anything which
346 std::vector<ShaderDiskCachePrecompiled> gl_cache; 351 // will be used one time.
347 if (!device.UseAssemblyShaders() && !device.UseDriverCache()) { 352 if (maxwell3d.regs.zeta_enable) {
348 // Only load precompiled cache when we are not using assembly shaders 353 return nullptr;
349 gl_cache = disk_cache.LoadPrecompiled();
350 } 354 }
351 const auto supported_formats = GetSupportedFormats(); 355 // If games are using a small index count, we can assume these are full screen quads.
352 356 // Usually these shaders are only used once for building textures so we can assume they
353 // Track if precompiled cache was altered during loading to know if we have to 357 // can't be built async
354 // serialize the virtual precompiled cache file back to the hard drive 358 if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
355 bool precompiled_cache_altered = false; 359 return pipeline;
356
357 // Inform the frontend about shader build initialization
358 if (callback) {
359 callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
360 } 360 }
361 return nullptr;
362}
361 363
362 std::mutex mutex; 364ComputePipeline* ShaderCache::CurrentComputePipeline() {
363 std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex 365 const VideoCommon::ShaderInfo* const shader{ComputeShader()};
364 std::atomic_bool gl_cache_failed = false; 366 if (!shader) {
365 367 return nullptr;
366 const auto find_precompiled = [&gl_cache](u64 id) {
367 return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier);
368 };
369
370 const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
371 std::size_t end) {
372 const auto scope = context->Acquire();
373
374 for (std::size_t i = begin; i < end; ++i) {
375 if (stop_loading.stop_requested()) {
376 return;
377 }
378 const auto& entry = (*transferable)[i];
379 const u64 uid = entry.unique_identifier;
380 const auto it = find_precompiled(uid);
381 const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
382
383 const bool is_compute = entry.type == ShaderType::Compute;
384 const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
385 auto registry = MakeRegistry(entry);
386 const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
387
388 ProgramSharedPtr program;
389 if (precompiled_entry) {
390 // If the shader is precompiled, attempt to load it with
391 program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
392 if (!program) {
393 gl_cache_failed = true;
394 }
395 }
396 if (!program) {
397 // Otherwise compile it from GLSL
398 program = BuildShader(device, entry.type, uid, ir, *registry, true);
399 }
400
401 PrecompiledShader shader;
402 shader.program = std::move(program);
403 shader.registry = std::move(registry);
404 shader.entries = MakeEntries(device, ir, entry.type);
405
406 std::scoped_lock lock{mutex};
407 if (callback) {
408 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
409 transferable->size());
410 }
411 runtime_cache.emplace(entry.unique_identifier, std::move(shader));
412 }
413 };
414
415 const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())};
416 const std::size_t bucket_size{transferable->size() / num_workers};
417 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
418 std::vector<std::thread> threads(num_workers);
419 for (std::size_t i = 0; i < num_workers; ++i) {
420 const bool is_last_worker = i + 1 == num_workers;
421 const std::size_t start{bucket_size * i};
422 const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
423
424 // On some platforms the shared context has to be created from the GUI thread
425 contexts[i] = emu_window.CreateSharedContext();
426 threads[i] = std::thread(worker, contexts[i].get(), start, end);
427 } 368 }
428 for (auto& thread : threads) { 369 const auto& qmd{kepler_compute.launch_description};
429 thread.join(); 370 const ComputePipelineKey key{
371 .unique_hash = shader->unique_hash,
372 .shared_memory_size = qmd.shared_alloc,
373 .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
374 };
375 const auto [pair, is_new]{compute_cache.try_emplace(key)};
376 auto& pipeline{pair->second};
377 if (!is_new) {
378 return pipeline.get();
430 } 379 }
380 pipeline = CreateComputePipeline(key, shader);
381 return pipeline.get();
382}
431 383
432 if (gl_cache_failed) { 384std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline() {
433 // Invalidate the precompiled cache if a shader dumped shader was rejected 385 GraphicsEnvironments environments;
434 disk_cache.InvalidatePrecompiled(); 386 GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
435 precompiled_cache_altered = true;
436 return;
437 }
438 if (stop_loading.stop_requested()) {
439 return;
440 }
441 387
442 if (device.UseAssemblyShaders() || device.UseDriverCache()) { 388 main_pools.ReleaseContents();
443 // Don't store precompiled binaries for assembly shaders or when using the driver cache 389 auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(),
444 return; 390 use_asynchronous_shaders)};
391 if (!pipeline || shader_cache_filename.empty()) {
392 return pipeline;
445 } 393 }
446 394 boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> env_ptrs;
447 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw 395 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
448 // before precompiling them 396 if (graphics_key.unique_hashes[index] != 0) {
449 397 env_ptrs.push_back(&environments.envs[index]);
450 for (std::size_t i = 0; i < transferable->size(); ++i) {
451 const u64 id = (*transferable)[i].unique_identifier;
452 const auto it = find_precompiled(id);
453 if (it == gl_cache.end()) {
454 const GLuint program = runtime_cache.at(id).program->source_program.handle;
455 disk_cache.SavePrecompiled(id, program);
456 precompiled_cache_altered = true;
457 } 398 }
458 } 399 }
459 400 SerializePipeline(graphics_key, env_ptrs, shader_cache_filename, CACHE_VERSION);
460 if (precompiled_cache_altered) { 401 return pipeline;
461 disk_cache.SaveVirtualPrecompiledFile();
462 }
463}
464
465ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
466 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
467 const std::unordered_set<GLenum>& supported_formats) {
468 if (!supported_formats.contains(precompiled_entry.binary_format)) {
469 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
470 return {};
471 }
472
473 auto program = std::make_shared<ProgramHandle>();
474 GLuint& handle = program->source_program.handle;
475 handle = glCreateProgram();
476 glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
477 glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
478 static_cast<GLsizei>(precompiled_entry.binary.size()));
479
480 GLint link_status;
481 glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
482 if (link_status == GL_FALSE) {
483 LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
484 return {};
485 }
486
487 return program;
488} 402}
489 403
490Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, 404std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
491 VideoCommon::Shader::AsyncShaders& async_shaders) { 405 ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
492 if (!maxwell3d.dirty.flags[Dirty::Shaders]) { 406 std::span<Shader::Environment* const> envs, bool build_in_parallel) try {
493 auto* last_shader = last_shaders[static_cast<std::size_t>(program)]; 407 LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
494 if (last_shader->IsBuilt()) { 408 size_t env_index{};
495 return last_shader; 409 u32 total_storage_buffers{};
410 std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
411 const bool uses_vertex_a{key.unique_hashes[0] != 0};
412 const bool uses_vertex_b{key.unique_hashes[1] != 0};
413 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
414 if (key.unique_hashes[index] == 0) {
415 continue;
496 } 416 }
497 } 417 Shader::Environment& env{*envs[env_index]};
418 ++env_index;
498 419
499 const GPUVAddr address{GetShaderAddress(maxwell3d, program)}; 420 const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
421 Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
422 if (!uses_vertex_a || index != 1) {
423 // Normal path
424 programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
500 425
501 if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { 426 for (const auto& desc : programs[index].info.storage_buffers_descriptors) {
502 auto completed_work = async_shaders.GetCompletedWork(); 427 total_storage_buffers += desc.count;
503 for (auto& work : completed_work) {
504 Shader* shader = TryGet(work.cpu_address);
505 gpu.ShaderNotify().MarkShaderComplete();
506 if (shader == nullptr) {
507 continue;
508 } 428 }
509 using namespace VideoCommon::Shader; 429 } else {
510 if (work.backend == AsyncShaders::Backend::OpenGL) { 430 // VertexB path when VertexA is present.
511 shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); 431 auto& program_va{programs[0]};
512 } else if (work.backend == AsyncShaders::Backend::GLASM) { 432 auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
513 shader->AsyncGLASMBuilt(std::move(work.program.glasm)); 433 for (const auto& desc : program_vb.info.storage_buffers_descriptors) {
434 total_storage_buffers += desc.count;
514 } 435 }
515 436 programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
516 auto& registry = shader->GetRegistry();
517
518 ShaderDiskCacheEntry entry;
519 entry.type = work.shader_type;
520 entry.code = std::move(work.code);
521 entry.code_b = std::move(work.code_b);
522 entry.unique_identifier = work.uid;
523 entry.bound_buffer = registry.GetBoundBuffer();
524 entry.graphics_info = registry.GetGraphicsInfo();
525 entry.keys = registry.GetKeys();
526 entry.bound_samplers = registry.GetBoundSamplers();
527 entry.bindless_samplers = registry.GetBindlessSamplers();
528 disk_cache.SaveEntry(std::move(entry));
529 } 437 }
530 } 438 }
531 439 const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()};
532 // Look up shader in the cache based on address 440 const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit};
533 const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)}; 441
534 if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { 442 std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
535 return last_shaders[static_cast<std::size_t>(program)] = shader; 443
536 } 444 OGLProgram source_program;
537 445 std::array<std::string, 5> sources;
538 const u8* const host_ptr{gpu_memory.GetPointer(address)}; 446 std::array<std::vector<u32>, 5> sources_spirv;
539 447 Shader::Backend::Bindings binding;
540 // No shader found - create a new one 448 Shader::IR::Program* previous_program{};
541 ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)}; 449 const bool use_glasm{device.UseAssemblyShaders()};
542 ProgramCode code_b; 450 const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0;
543 if (program == Maxwell::ShaderProgram::VertexA) { 451 for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) {
544 const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)}; 452 if (key.unique_hashes[index] == 0) {
545 const u8* host_ptr_b = gpu_memory.GetPointer(address_b); 453 continue;
546 code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false); 454 }
547 } 455 UNIMPLEMENTED_IF(index == 0);
548 const std::size_t code_size = code.size() * sizeof(u64); 456
549 457 Shader::IR::Program& program{programs[index]};
550 const u64 unique_identifier = GetUniqueIdentifier( 458 const size_t stage_index{index - 1};
551 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); 459 infos[stage_index] = &program.info;
552 460
553 const ShaderParameters params{gpu, maxwell3d, disk_cache, device, 461 const auto runtime_info{
554 *cpu_addr, host_ptr, unique_identifier}; 462 MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)};
555 463 switch (device.GetShaderBackend()) {
556 std::unique_ptr<Shader> shader; 464 case Settings::ShaderBackend::GLSL:
557 const auto found = runtime_cache.find(unique_identifier); 465 sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding);
558 if (found == runtime_cache.end()) { 466 break;
559 shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), 467 case Settings::ShaderBackend::GLASM:
560 async_shaders, cpu_addr.value_or(0)); 468 sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding);
561 } else { 469 break;
562 shader = Shader::CreateFromCache(params, found->second); 470 case Settings::ShaderBackend::SPIRV:
563 } 471 sources_spirv[stage_index] = EmitSPIRV(profile, runtime_info, program, binding);
564 472 break;
565 Shader* const result = shader.get(); 473 }
566 if (cpu_addr) { 474 previous_program = &program;
567 Register(std::move(shader), *cpu_addr, code_size);
568 } else {
569 null_shader = std::move(shader);
570 } 475 }
476 auto* const thread_worker{build_in_parallel ? workers.get() : nullptr};
477 return std::make_unique<GraphicsPipeline>(
478 device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
479 thread_worker, &shader_notify, sources, sources_spirv, infos, key);
571 480
572 return last_shaders[static_cast<std::size_t>(program)] = result; 481} catch (Shader::Exception& exception) {
482 LOG_ERROR(Render_OpenGL, "{}", exception.what());
483 return nullptr;
573} 484}
574 485
575Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { 486std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
576 const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)}; 487 const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) {
577 488 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
578 if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { 489 const auto& qmd{kepler_compute.launch_description};
579 return kernel; 490 ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
580 } 491 env.SetCachedSize(shader->size_bytes);
581 492
582 // No kernel found, create a new one 493 main_pools.ReleaseContents();
583 const u8* host_ptr{gpu_memory.GetPointer(code_addr)}; 494 auto pipeline{CreateComputePipeline(main_pools, key, env)};
584 ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)}; 495 if (!pipeline || shader_cache_filename.empty()) {
585 const std::size_t code_size{code.size() * sizeof(u64)}; 496 return pipeline;
586 const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; 497 }
587 498 SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, shader_cache_filename,
588 const ShaderParameters params{gpu, kepler_compute, disk_cache, device, 499 CACHE_VERSION);
589 *cpu_addr, host_ptr, unique_identifier}; 500 return pipeline;
501}
590 502
591 std::unique_ptr<Shader> kernel; 503std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
592 const auto found = runtime_cache.find(unique_identifier); 504 ShaderContext::ShaderPools& pools, const ComputePipelineKey& key,
593 if (found == runtime_cache.end()) { 505 Shader::Environment& env) try {
594 kernel = Shader::CreateKernelFromMemory(params, std::move(code)); 506 LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
595 } else { 507
596 kernel = Shader::CreateFromCache(params, found->second); 508 Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
597 } 509 auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
510
511 u32 num_storage_buffers{};
512 for (const auto& desc : program.info.storage_buffers_descriptors) {
513 num_storage_buffers += desc.count;
514 }
515 Shader::RuntimeInfo info;
516 info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
517
518 std::string code{};
519 std::vector<u32> code_spirv;
520 switch (device.GetShaderBackend()) {
521 case Settings::ShaderBackend::GLSL:
522 code = EmitGLSL(profile, program);
523 break;
524 case Settings::ShaderBackend::GLASM:
525 code = EmitGLASM(profile, info, program);
526 break;
527 case Settings::ShaderBackend::SPIRV:
528 code_spirv = EmitSPIRV(profile, program);
529 break;
530 }
531
532 return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory,
533 kepler_compute, program_manager, program.info, code,
534 code_spirv);
535} catch (Shader::Exception& exception) {
536 LOG_ERROR(Render_OpenGL, "{}", exception.what());
537 return nullptr;
538}
598 539
599 Shader* const result = kernel.get(); 540std::unique_ptr<ShaderWorker> ShaderCache::CreateWorkers() const {
600 if (cpu_addr) { 541 return std::make_unique<ShaderWorker>(std::max(std::thread::hardware_concurrency(), 2U) - 1,
601 Register(std::move(kernel), *cpu_addr, code_size); 542 "yuzu:ShaderBuilder",
602 } else { 543 [this] { return Context{emu_window}; });
603 null_kernel = std::move(kernel);
604 }
605 return result;
606} 544}
607 545
608} // namespace OpenGL 546} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index b30308b6f..a34110b37 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -5,157 +5,93 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <atomic> 8#include <filesystem>
9#include <bitset> 9#include <stop_token>
10#include <memory>
11#include <string>
12#include <tuple>
13#include <unordered_map> 10#include <unordered_map>
14#include <unordered_set>
15#include <vector>
16 11
17#include <glad/glad.h> 12#include <glad/glad.h>
18 13
19#include "common/common_types.h" 14#include "common/common_types.h"
20#include "video_core/engines/shader_type.h" 15#include "common/thread_worker.h"
21#include "video_core/renderer_opengl/gl_resource_manager.h" 16#include "shader_recompiler/frontend/ir/value.h"
22#include "video_core/renderer_opengl/gl_shader_decompiler.h" 17#include "shader_recompiler/host_translate_info.h"
23#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 18#include "shader_recompiler/object_pool.h"
24#include "video_core/shader/registry.h" 19#include "shader_recompiler/profile.h"
25#include "video_core/shader/shader_ir.h" 20#include "video_core/renderer_opengl/gl_compute_pipeline.h"
21#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
22#include "video_core/renderer_opengl/gl_shader_context.h"
26#include "video_core/shader_cache.h" 23#include "video_core/shader_cache.h"
27 24
28namespace Tegra { 25namespace Tegra {
29class MemoryManager; 26class MemoryManager;
30} 27}
31 28
32namespace Core::Frontend {
33class EmuWindow;
34}
35
36namespace VideoCommon::Shader {
37class AsyncShaders;
38}
39
40namespace OpenGL { 29namespace OpenGL {
41 30
42class Device; 31class Device;
32class ProgramManager;
43class RasterizerOpenGL; 33class RasterizerOpenGL;
34using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
44 35
45using Maxwell = Tegra::Engines::Maxwell3D::Regs; 36class ShaderCache : public VideoCommon::ShaderCache {
46
47struct ProgramHandle {
48 OGLProgram source_program;
49 OGLAssemblyProgram assembly_program;
50};
51using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
52
53struct PrecompiledShader {
54 ProgramSharedPtr program;
55 std::shared_ptr<VideoCommon::Shader::Registry> registry;
56 ShaderEntries entries;
57};
58
59struct ShaderParameters {
60 Tegra::GPU& gpu;
61 Tegra::Engines::ConstBufferEngineInterface& engine;
62 ShaderDiskCacheOpenGL& disk_cache;
63 const Device& device;
64 VAddr cpu_addr;
65 const u8* host_ptr;
66 u64 unique_identifier;
67};
68
69ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
70 u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
71 const VideoCommon::Shader::Registry& registry,
72 bool hint_retrievable = false);
73
74class Shader final {
75public: 37public:
76 ~Shader(); 38 explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
77 39 Tegra::Engines::Maxwell3D& maxwell3d_,
78 /// Gets the GL program handle for the shader 40 Tegra::Engines::KeplerCompute& kepler_compute_,
79 GLuint GetHandle() const; 41 Tegra::MemoryManager& gpu_memory_, const Device& device_,
80 42 TextureCache& texture_cache_, BufferCache& buffer_cache_,
81 bool IsBuilt() const; 43 ProgramManager& program_manager_, StateTracker& state_tracker_,
82 44 VideoCore::ShaderNotify& shader_notify_);
83 /// Gets the shader entries for the shader 45 ~ShaderCache();
84 const ShaderEntries& GetEntries() const {
85 return entries;
86 }
87
88 const VideoCommon::Shader::Registry& GetRegistry() const {
89 return *registry;
90 }
91
92 /// Mark a OpenGL shader as built
93 void AsyncOpenGLBuilt(OGLProgram new_program);
94 46
95 /// Mark a GLASM shader as built 47 void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
96 void AsyncGLASMBuilt(OGLAssemblyProgram new_program); 48 const VideoCore::DiskResourceLoadCallback& callback);
97 49
98 static std::unique_ptr<Shader> CreateStageFromMemory( 50 [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
99 const ShaderParameters& params, Maxwell::ShaderProgram program_type,
100 ProgramCode program_code, ProgramCode program_code_b,
101 VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
102 51
103 static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, 52 [[nodiscard]] ComputePipeline* CurrentComputePipeline();
104 ProgramCode code);
105
106 static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
107 const PrecompiledShader& precompiled_shader);
108 53
109private: 54private:
110 explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, 55 GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
111 ProgramSharedPtr program, bool is_built_ = true);
112
113 std::shared_ptr<VideoCommon::Shader::Registry> registry;
114 ShaderEntries entries;
115 ProgramSharedPtr program;
116 GLuint handle = 0;
117 bool is_built{};
118};
119 56
120class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { 57 [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
121public:
122 explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
123 Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu,
124 Tegra::Engines::Maxwell3D& maxwell3d_,
125 Tegra::Engines::KeplerCompute& kepler_compute_,
126 Tegra::MemoryManager& gpu_memory_, const Device& device_);
127 ~ShaderCacheOpenGL() override;
128 58
129 /// Loads disk cache for the current game 59 std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
130 void LoadDiskCache(u64 title_id, std::stop_token stop_loading,
131 const VideoCore::DiskResourceLoadCallback& callback);
132 60
133 /// Gets the current specified shader stage program 61 std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
134 Shader* GetStageProgram(Maxwell::ShaderProgram program, 62 ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
135 VideoCommon::Shader::AsyncShaders& async_shaders); 63 std::span<Shader::Environment* const> envs, bool build_in_parallel);
136 64
137 /// Gets a compute kernel in the passed address 65 std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineKey& key,
138 Shader* GetComputeKernel(GPUVAddr code_addr); 66 const VideoCommon::ShaderInfo* shader);
139 67
140private: 68 std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderContext::ShaderPools& pools,
141 ProgramSharedPtr GeneratePrecompiledProgram( 69 const ComputePipelineKey& key,
142 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, 70 Shader::Environment& env);
143 const std::unordered_set<GLenum>& supported_formats); 71
72 std::unique_ptr<ShaderWorker> CreateWorkers() const;
144 73
145 Core::Frontend::EmuWindow& emu_window; 74 Core::Frontend::EmuWindow& emu_window;
146 Tegra::GPU& gpu;
147 Tegra::MemoryManager& gpu_memory;
148 Tegra::Engines::Maxwell3D& maxwell3d;
149 Tegra::Engines::KeplerCompute& kepler_compute;
150 const Device& device; 75 const Device& device;
76 TextureCache& texture_cache;
77 BufferCache& buffer_cache;
78 ProgramManager& program_manager;
79 StateTracker& state_tracker;
80 VideoCore::ShaderNotify& shader_notify;
81 const bool use_asynchronous_shaders;
82
83 GraphicsPipelineKey graphics_key{};
84 GraphicsPipeline* current_pipeline{};
151 85
152 ShaderDiskCacheOpenGL disk_cache; 86 ShaderContext::ShaderPools main_pools;
153 std::unordered_map<u64, PrecompiledShader> runtime_cache; 87 std::unordered_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
88 std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache;
154 89
155 std::unique_ptr<Shader> null_shader; 90 Shader::Profile profile;
156 std::unique_ptr<Shader> null_kernel; 91 Shader::HostTranslateInfo host_info;
157 92
158 std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; 93 std::filesystem::path shader_cache_filename;
94 std::unique_ptr<ShaderWorker> workers;
159}; 95};
160 96
161} // namespace OpenGL 97} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h
new file mode 100644
index 000000000..6ff34e5d6
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_context.h
@@ -0,0 +1,33 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/frontend/emu_window.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9#include "shader_recompiler/frontend/maxwell/control_flow.h"
10
11namespace OpenGL::ShaderContext {
12struct ShaderPools {
13 void ReleaseContents() {
14 flow_block.ReleaseContents();
15 block.ReleaseContents();
16 inst.ReleaseContents();
17 }
18
19 Shader::ObjectPool<Shader::IR::Inst> inst;
20 Shader::ObjectPool<Shader::IR::Block> block;
21 Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
22};
23
24struct Context {
25 explicit Context(Core::Frontend::EmuWindow& emu_window)
26 : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {}
27
28 std::unique_ptr<Core::Frontend::GraphicsContext> gl_context;
29 Core::Frontend::GraphicsContext::Scoped scoped;
30 ShaderPools pools;
31};
32
33} // namespace OpenGL::ShaderContext
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
deleted file mode 100644
index 9c28498e8..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ /dev/null
@@ -1,2986 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <string>
7#include <string_view>
8#include <utility>
9#include <variant>
10#include <vector>
11
12#include <fmt/format.h>
13
14#include "common/alignment.h"
15#include "common/assert.h"
16#include "common/common_types.h"
17#include "common/div_ceil.h"
18#include "common/logging/log.h"
19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/engines/shader_type.h"
21#include "video_core/renderer_opengl/gl_device.h"
22#include "video_core/renderer_opengl/gl_rasterizer.h"
23#include "video_core/renderer_opengl/gl_shader_decompiler.h"
24#include "video_core/shader/ast.h"
25#include "video_core/shader/node.h"
26#include "video_core/shader/shader_ir.h"
27#include "video_core/shader/transform_feedback.h"
28
29namespace OpenGL {
30
31namespace {
32
33using Tegra::Engines::ShaderType;
34using Tegra::Shader::Attribute;
35using Tegra::Shader::Header;
36using Tegra::Shader::IpaInterpMode;
37using Tegra::Shader::IpaMode;
38using Tegra::Shader::IpaSampleMode;
39using Tegra::Shader::PixelImap;
40using Tegra::Shader::Register;
41using Tegra::Shader::TextureType;
42
43using namespace VideoCommon::Shader;
44using namespace std::string_literals;
45
46using Maxwell = Tegra::Engines::Maxwell3D::Regs;
47using Operation = const OperationNode&;
48
49class ASTDecompiler;
50class ExprDecompiler;
51
52enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
53
54constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
55
56constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
57constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
58
59struct TextureOffset {};
60struct TextureDerivates {};
61using TextureArgument = std::pair<Type, Node>;
62using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
63
64constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
65constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
66
67constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
68#define ftou floatBitsToUint
69#define itof intBitsToFloat
70#define utof uintBitsToFloat
71
72bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
73 bvec2 is_nan1 = isnan(pair1);
74 bvec2 is_nan2 = isnan(pair2);
75 return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
76}}
77
78const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
79const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
80)";
81
82class ShaderWriter final {
83public:
84 void AddExpression(std::string_view text) {
85 DEBUG_ASSERT(scope >= 0);
86 if (!text.empty()) {
87 AppendIndentation();
88 }
89 shader_source += text;
90 }
91
92 // Forwards all arguments directly to libfmt.
93 // Note that all formatting requirements for fmt must be
94 // obeyed when using this function. (e.g. {{ must be used
95 // printing the character '{' is desirable. Ditto for }} and '}',
96 // etc).
97 template <typename... Args>
98 void AddLine(std::string_view text, Args&&... args) {
99 AddExpression(fmt::format(fmt::runtime(text), std::forward<Args>(args)...));
100 AddNewLine();
101 }
102
103 void AddNewLine() {
104 DEBUG_ASSERT(scope >= 0);
105 shader_source += '\n';
106 }
107
108 std::string GenerateTemporary() {
109 return fmt::format("tmp{}", temporary_index++);
110 }
111
112 std::string GetResult() {
113 return std::move(shader_source);
114 }
115
116 s32 scope = 0;
117
118private:
119 void AppendIndentation() {
120 shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');
121 }
122
123 std::string shader_source;
124 u32 temporary_index = 1;
125};
126
127class Expression final {
128public:
129 Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} {
130 ASSERT(type != Type::Void);
131 }
132 Expression() : type{Type::Void} {}
133
134 Type GetType() const {
135 return type;
136 }
137
138 std::string GetCode() const {
139 return code;
140 }
141
142 void CheckVoid() const {
143 ASSERT(type == Type::Void);
144 }
145
146 std::string As(Type type_) const {
147 switch (type_) {
148 case Type::Bool:
149 return AsBool();
150 case Type::Bool2:
151 return AsBool2();
152 case Type::Float:
153 return AsFloat();
154 case Type::Int:
155 return AsInt();
156 case Type::Uint:
157 return AsUint();
158 case Type::HalfFloat:
159 return AsHalfFloat();
160 default:
161 UNREACHABLE_MSG("Invalid type");
162 return code;
163 }
164 }
165
166 std::string AsBool() const {
167 switch (type) {
168 case Type::Bool:
169 return code;
170 default:
171 UNREACHABLE_MSG("Incompatible types");
172 return code;
173 }
174 }
175
176 std::string AsBool2() const {
177 switch (type) {
178 case Type::Bool2:
179 return code;
180 default:
181 UNREACHABLE_MSG("Incompatible types");
182 return code;
183 }
184 }
185
186 std::string AsFloat() const {
187 switch (type) {
188 case Type::Float:
189 return code;
190 case Type::Uint:
191 return fmt::format("utof({})", code);
192 case Type::Int:
193 return fmt::format("itof({})", code);
194 case Type::HalfFloat:
195 return fmt::format("utof(packHalf2x16({}))", code);
196 default:
197 UNREACHABLE_MSG("Incompatible types");
198 return code;
199 }
200 }
201
202 std::string AsInt() const {
203 switch (type) {
204 case Type::Float:
205 return fmt::format("ftoi({})", code);
206 case Type::Uint:
207 return fmt::format("int({})", code);
208 case Type::Int:
209 return code;
210 case Type::HalfFloat:
211 return fmt::format("int(packHalf2x16({}))", code);
212 default:
213 UNREACHABLE_MSG("Incompatible types");
214 return code;
215 }
216 }
217
218 std::string AsUint() const {
219 switch (type) {
220 case Type::Float:
221 return fmt::format("ftou({})", code);
222 case Type::Uint:
223 return code;
224 case Type::Int:
225 return fmt::format("uint({})", code);
226 case Type::HalfFloat:
227 return fmt::format("packHalf2x16({})", code);
228 default:
229 UNREACHABLE_MSG("Incompatible types");
230 return code;
231 }
232 }
233
234 std::string AsHalfFloat() const {
235 switch (type) {
236 case Type::Float:
237 return fmt::format("unpackHalf2x16(ftou({}))", code);
238 case Type::Uint:
239 return fmt::format("unpackHalf2x16({})", code);
240 case Type::Int:
241 return fmt::format("unpackHalf2x16(int({}))", code);
242 case Type::HalfFloat:
243 return code;
244 default:
245 UNREACHABLE_MSG("Incompatible types");
246 return code;
247 }
248 }
249
250private:
251 std::string code;
252 Type type{};
253};
254
255const char* GetTypeString(Type type) {
256 switch (type) {
257 case Type::Bool:
258 return "bool";
259 case Type::Bool2:
260 return "bvec2";
261 case Type::Float:
262 return "float";
263 case Type::Int:
264 return "int";
265 case Type::Uint:
266 return "uint";
267 case Type::HalfFloat:
268 return "vec2";
269 default:
270 UNREACHABLE_MSG("Invalid type");
271 return "<invalid type>";
272 }
273}
274
275const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
276 switch (image_type) {
277 case Tegra::Shader::ImageType::Texture1D:
278 return "1D";
279 case Tegra::Shader::ImageType::TextureBuffer:
280 return "Buffer";
281 case Tegra::Shader::ImageType::Texture1DArray:
282 return "1DArray";
283 case Tegra::Shader::ImageType::Texture2D:
284 return "2D";
285 case Tegra::Shader::ImageType::Texture2DArray:
286 return "2DArray";
287 case Tegra::Shader::ImageType::Texture3D:
288 return "3D";
289 default:
290 UNREACHABLE();
291 return "1D";
292 }
293}
294
295/// Describes primitive behavior on geometry shaders
296std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
297 switch (topology) {
298 case Maxwell::PrimitiveTopology::Points:
299 return {"points", 1};
300 case Maxwell::PrimitiveTopology::Lines:
301 case Maxwell::PrimitiveTopology::LineStrip:
302 return {"lines", 2};
303 case Maxwell::PrimitiveTopology::LinesAdjacency:
304 case Maxwell::PrimitiveTopology::LineStripAdjacency:
305 return {"lines_adjacency", 4};
306 case Maxwell::PrimitiveTopology::Triangles:
307 case Maxwell::PrimitiveTopology::TriangleStrip:
308 case Maxwell::PrimitiveTopology::TriangleFan:
309 return {"triangles", 3};
310 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
311 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
312 return {"triangles_adjacency", 6};
313 default:
314 UNIMPLEMENTED_MSG("topology={}", topology);
315 return {"points", 1};
316 }
317}
318
319/// Generates code to use for a swizzle operation.
320constexpr const char* GetSwizzle(std::size_t element) {
321 constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
322 return swizzle.at(element);
323}
324
325constexpr const char* GetColorSwizzle(std::size_t element) {
326 constexpr std::array swizzle = {".r", ".g", ".b", ".a"};
327 return swizzle.at(element);
328}
329
330/// Translate topology
331std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
332 switch (topology) {
333 case Tegra::Shader::OutputTopology::PointList:
334 return "points";
335 case Tegra::Shader::OutputTopology::LineStrip:
336 return "line_strip";
337 case Tegra::Shader::OutputTopology::TriangleStrip:
338 return "triangle_strip";
339 default:
340 UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
341 return "points";
342 }
343}
344
345/// Returns true if an object has to be treated as precise
346bool IsPrecise(Operation operand) {
347 const auto& meta{operand.GetMeta()};
348 if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
349 return arithmetic->precise;
350 }
351 return false;
352}
353
354bool IsPrecise(const Node& node) {
355 if (const auto operation = std::get_if<OperationNode>(&*node)) {
356 return IsPrecise(*operation);
357 }
358 return false;
359}
360
361constexpr bool IsGenericAttribute(Attribute::Index index) {
362 return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
363}
364
365constexpr bool IsLegacyTexCoord(Attribute::Index index) {
366 return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) &&
367 static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7);
368}
369
370constexpr Attribute::Index ToGenericAttribute(u64 value) {
371 return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0));
372}
373
374constexpr int GetLegacyTexCoordIndex(Attribute::Index index) {
375 return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0);
376}
377
378u32 GetGenericAttributeIndex(Attribute::Index index) {
379 ASSERT(IsGenericAttribute(index));
380 return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
381}
382
383constexpr const char* GetFlowStackPrefix(MetaStackClass stack) {
384 switch (stack) {
385 case MetaStackClass::Ssy:
386 return "ssy";
387 case MetaStackClass::Pbk:
388 return "pbk";
389 }
390 return {};
391}
392
393std::string FlowStackName(MetaStackClass stack) {
394 return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack));
395}
396
397std::string FlowStackTopName(MetaStackClass stack) {
398 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
399}
400
401struct GenericVaryingDescription {
402 std::string name;
403 u8 first_element = 0;
404 bool is_scalar = false;
405};
406
407class GLSLDecompiler final {
408public:
409 explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
410 ShaderType stage_, std::string_view identifier_,
411 std::string_view suffix_)
412 : device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
413 identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
414 if (stage != ShaderType::Compute) {
415 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
416 }
417 }
418
419 void Decompile() {
420 DeclareHeader();
421 DeclareVertex();
422 DeclareGeometry();
423 DeclareFragment();
424 DeclareCompute();
425 DeclareInputAttributes();
426 DeclareOutputAttributes();
427 DeclareImages();
428 DeclareSamplers();
429 DeclareGlobalMemory();
430 DeclareConstantBuffers();
431 DeclareLocalMemory();
432 DeclareRegisters();
433 DeclarePredicates();
434 DeclareInternalFlags();
435 DeclareCustomVariables();
436 DeclarePhysicalAttributeReader();
437
438 code.AddLine("void main() {{");
439 ++code.scope;
440
441 if (stage == ShaderType::Vertex) {
442 code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
443 }
444
445 if (ir.IsDecompiled()) {
446 DecompileAST();
447 } else {
448 DecompileBranchMode();
449 }
450
451 --code.scope;
452 code.AddLine("}}");
453 }
454
455 std::string GetResult() {
456 return code.GetResult();
457 }
458
459private:
460 friend class ASTDecompiler;
461 friend class ExprDecompiler;
462
463 void DecompileBranchMode() {
464 // VM's program counter
465 const auto first_address = ir.GetBasicBlocks().begin()->first;
466 code.AddLine("uint jmp_to = {}U;", first_address);
467
468 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
469 // unlikely that shaders will use 20 nested SSYs and PBKs.
470 constexpr u32 FLOW_STACK_SIZE = 20;
471 if (!ir.IsFlowStackDisabled()) {
472 for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
473 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
474 code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
475 }
476 }
477
478 code.AddLine("while (true) {{");
479 ++code.scope;
480
481 code.AddLine("switch (jmp_to) {{");
482
483 for (const auto& pair : ir.GetBasicBlocks()) {
484 const auto& [address, bb] = pair;
485 code.AddLine("case 0x{:X}U: {{", address);
486 ++code.scope;
487
488 VisitBlock(bb);
489
490 --code.scope;
491 code.AddLine("}}");
492 }
493
494 code.AddLine("default: return;");
495 code.AddLine("}}");
496
497 --code.scope;
498 code.AddLine("}}");
499 }
500
501 void DecompileAST();
502
503 void DeclareHeader() {
504 if (!identifier.empty()) {
505 code.AddLine("// {}", identifier);
506 }
507 const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
508 code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
509 code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
510 if (device.HasShaderBallot()) {
511 code.AddLine("#extension GL_ARB_shader_ballot : require");
512 }
513 if (device.HasVertexViewportLayer()) {
514 code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require");
515 }
516 if (device.HasImageLoadFormatted()) {
517 code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
518 }
519 if (device.HasTextureShadowLod()) {
520 code.AddLine("#extension GL_EXT_texture_shadow_lod : require");
521 }
522 if (device.HasWarpIntrinsics()) {
523 code.AddLine("#extension GL_NV_gpu_shader5 : require");
524 code.AddLine("#extension GL_NV_shader_thread_group : require");
525 code.AddLine("#extension GL_NV_shader_thread_shuffle : require");
526 }
527 // This pragma stops Nvidia's driver from over optimizing math (probably using fp16
528 // operations) on places where we don't want to.
529 // Thanks to Ryujinx for finding this workaround.
530 code.AddLine("#pragma optionNV(fastmath off)");
531
532 code.AddNewLine();
533
534 code.AddLine(COMMON_DECLARATIONS);
535 }
536
537 void DeclareVertex() {
538 if (stage != ShaderType::Vertex) {
539 return;
540 }
541
542 DeclareVertexRedeclarations();
543 }
544
545 void DeclareGeometry() {
546 if (stage != ShaderType::Geometry) {
547 return;
548 }
549
550 const auto& info = registry.GetGraphicsInfo();
551 const auto input_topology = info.primitive_topology;
552 const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
553 max_input_vertices = max_vertices;
554 code.AddLine("layout ({}) in;", glsl_topology);
555
556 const auto topology = GetTopologyName(header.common3.output_topology);
557 const auto max_output_vertices = header.common4.max_output_vertices.Value();
558 code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
559 code.AddNewLine();
560
561 code.AddLine("in gl_PerVertex {{");
562 ++code.scope;
563 code.AddLine("vec4 gl_Position;");
564 --code.scope;
565 code.AddLine("}} gl_in[];");
566
567 DeclareVertexRedeclarations();
568 }
569
570 void DeclareFragment() {
571 if (stage != ShaderType::Fragment) {
572 return;
573 }
574 if (ir.UsesLegacyVaryings()) {
575 code.AddLine("in gl_PerFragment {{");
576 ++code.scope;
577 code.AddLine("vec4 gl_TexCoord[8];");
578 code.AddLine("vec4 gl_Color;");
579 code.AddLine("vec4 gl_SecondaryColor;");
580 --code.scope;
581 code.AddLine("}};");
582 }
583
584 for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
585 code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt);
586 }
587 }
588
589 void DeclareCompute() {
590 if (stage != ShaderType::Compute) {
591 return;
592 }
593 const auto& info = registry.GetComputeInfo();
594 if (u32 size = info.shared_memory_size_in_words * 4; size > 0) {
595 const u32 limit = device.GetMaxComputeSharedMemorySize();
596 if (size > limit) {
597 LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
598 size, limit);
599 size = limit;
600 }
601
602 code.AddLine("shared uint smem[{}];", size / 4);
603 code.AddNewLine();
604 }
605 code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
606 info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
607 code.AddNewLine();
608 }
609
610 void DeclareVertexRedeclarations() {
611 code.AddLine("out gl_PerVertex {{");
612 ++code.scope;
613
614 auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
615 if (!pos_xfb.empty()) {
616 pos_xfb = fmt::format("layout ({}) ", pos_xfb);
617 }
618 const char* pos_type =
619 FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
620 code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
621
622 for (const auto attribute : ir.GetOutputAttributes()) {
623 if (attribute == Attribute::Index::ClipDistances0123 ||
624 attribute == Attribute::Index::ClipDistances4567) {
625 code.AddLine("float gl_ClipDistance[];");
626 break;
627 }
628 }
629
630 if (stage != ShaderType::Geometry &&
631 (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) {
632 if (ir.UsesLayer()) {
633 code.AddLine("int gl_Layer;");
634 }
635 if (ir.UsesViewportIndex()) {
636 code.AddLine("int gl_ViewportIndex;");
637 }
638 } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex &&
639 !device.HasVertexViewportLayer()) {
640 LOG_ERROR(
641 Render_OpenGL,
642 "GL_ARB_shader_viewport_layer_array is not available and its required by a shader");
643 }
644
645 if (ir.UsesPointSize()) {
646 code.AddLine("float gl_PointSize;");
647 }
648
649 if (ir.UsesLegacyVaryings()) {
650 code.AddLine("vec4 gl_TexCoord[8];");
651 code.AddLine("vec4 gl_FrontColor;");
652 code.AddLine("vec4 gl_FrontSecondaryColor;");
653 code.AddLine("vec4 gl_BackColor;");
654 code.AddLine("vec4 gl_BackSecondaryColor;");
655 }
656
657 --code.scope;
658 code.AddLine("}};");
659 code.AddNewLine();
660
661 if (stage == ShaderType::Geometry) {
662 if (ir.UsesLayer()) {
663 code.AddLine("out int gl_Layer;");
664 }
665 if (ir.UsesViewportIndex()) {
666 code.AddLine("out int gl_ViewportIndex;");
667 }
668 }
669 code.AddNewLine();
670 }
671
672 void DeclareRegisters() {
673 const auto& registers = ir.GetRegisters();
674 for (const u32 gpr : registers) {
675 code.AddLine("float {} = 0.0f;", GetRegister(gpr));
676 }
677 if (!registers.empty()) {
678 code.AddNewLine();
679 }
680 }
681
682 void DeclareCustomVariables() {
683 const u32 num_custom_variables = ir.GetNumCustomVariables();
684 for (u32 i = 0; i < num_custom_variables; ++i) {
685 code.AddLine("float {} = 0.0f;", GetCustomVariable(i));
686 }
687 if (num_custom_variables > 0) {
688 code.AddNewLine();
689 }
690 }
691
692 void DeclarePredicates() {
693 const auto& predicates = ir.GetPredicates();
694 for (const auto pred : predicates) {
695 code.AddLine("bool {} = false;", GetPredicate(pred));
696 }
697 if (!predicates.empty()) {
698 code.AddNewLine();
699 }
700 }
701
702 void DeclareLocalMemory() {
703 u64 local_memory_size = 0;
704 if (stage == ShaderType::Compute) {
705 local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
706 } else {
707 local_memory_size = header.GetLocalMemorySize();
708 }
709 if (local_memory_size == 0) {
710 return;
711 }
712 const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
713 code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
714 code.AddNewLine();
715 }
716
717 void DeclareInternalFlags() {
718 for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
719 const auto flag_code = static_cast<InternalFlag>(flag);
720 code.AddLine("bool {} = false;", GetInternalFlag(flag_code));
721 }
722 code.AddNewLine();
723 }
724
725 const char* GetInputFlags(PixelImap attribute) {
726 switch (attribute) {
727 case PixelImap::Perspective:
728 return "smooth";
729 case PixelImap::Constant:
730 return "flat";
731 case PixelImap::ScreenLinear:
732 return "noperspective";
733 case PixelImap::Unused:
734 break;
735 }
736 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
737 return {};
738 }
739
740 void DeclareInputAttributes() {
741 if (ir.HasPhysicalAttributes()) {
742 const u32 num_inputs{GetNumPhysicalInputAttributes()};
743 for (u32 i = 0; i < num_inputs; ++i) {
744 DeclareInputAttribute(ToGenericAttribute(i), true);
745 }
746 code.AddNewLine();
747 return;
748 }
749
750 const auto& attributes = ir.GetInputAttributes();
751 for (const auto index : attributes) {
752 if (IsGenericAttribute(index)) {
753 DeclareInputAttribute(index, false);
754 }
755 }
756 if (!attributes.empty()) {
757 code.AddNewLine();
758 }
759 }
760
761 void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
762 const u32 location{GetGenericAttributeIndex(index)};
763
764 std::string name{GetGenericInputAttribute(index)};
765 if (stage == ShaderType::Geometry) {
766 name = "gs_" + name + "[]";
767 }
768
769 std::string suffix_;
770 if (stage == ShaderType::Fragment) {
771 const auto input_mode{header.ps.GetPixelImap(location)};
772 if (input_mode == PixelImap::Unused) {
773 return;
774 }
775 suffix_ = GetInputFlags(input_mode);
776 }
777
778 code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name);
779 }
780
781 void DeclareOutputAttributes() {
782 if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) {
783 for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
784 DeclareOutputAttribute(ToGenericAttribute(i));
785 }
786 code.AddNewLine();
787 return;
788 }
789
790 const auto& attributes = ir.GetOutputAttributes();
791 for (const auto index : attributes) {
792 if (IsGenericAttribute(index)) {
793 DeclareOutputAttribute(index);
794 }
795 }
796 if (!attributes.empty()) {
797 code.AddNewLine();
798 }
799 }
800
801 std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
802 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
803 const auto it = transform_feedback.find(location);
804 if (it == transform_feedback.end()) {
805 return std::nullopt;
806 }
807 return it->second.components;
808 }
809
810 std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
811 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
812 const auto it = transform_feedback.find(location);
813 if (it == transform_feedback.end()) {
814 return {};
815 }
816
817 const VaryingTFB& tfb = it->second;
818 return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
819 tfb.offset, tfb.stride);
820 }
821
822 void DeclareOutputAttribute(Attribute::Index index) {
823 static constexpr std::string_view swizzle = "xyzw";
824 u8 element = 0;
825 while (element < 4) {
826 auto xfb = GetTransformFeedbackDecoration(index, element);
827 if (!xfb.empty()) {
828 xfb = fmt::format(", {}", xfb);
829 }
830 const std::size_t remainder = 4 - element;
831 const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
832 const char* const type = FLOAT_TYPES.at(num_components - 1);
833
834 const u32 location = GetGenericAttributeIndex(index);
835
836 GenericVaryingDescription description;
837 description.first_element = static_cast<u8>(element);
838 description.is_scalar = num_components == 1;
839 description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
840 if (element != 0 || num_components != 4) {
841 const std::string_view name_swizzle = swizzle.substr(element, num_components);
842 description.name = fmt::format("{}_{}", description.name, name_swizzle);
843 }
844 for (std::size_t i = 0; i < num_components; ++i) {
845 const u8 offset = static_cast<u8>(location * 4 + element + i);
846 varying_description.insert({offset, description});
847 }
848
849 code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
850 xfb, type, description.name);
851
852 element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
853 }
854 }
855
856 void DeclareConstantBuffers() {
857 u32 binding = device.GetBaseBindings(stage).uniform_buffer;
858 for (const auto& [index, info] : ir.GetConstantBuffers()) {
859 const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32));
860 const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
861 code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
862 GetConstBufferBlock(index));
863 code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size);
864 code.AddLine("}};");
865 code.AddNewLine();
866 }
867 }
868
869 void DeclareGlobalMemory() {
870 u32 binding = device.GetBaseBindings(stage).shader_storage_buffer;
871 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
872 // Since we don't know how the shader will use the shader, hint the driver to disable as
873 // much optimizations as possible
874 std::string qualifier = "coherent volatile";
875 if (usage.is_read && !usage.is_written) {
876 qualifier += " readonly";
877 } else if (usage.is_written && !usage.is_read) {
878 qualifier += " writeonly";
879 }
880
881 code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier,
882 GetGlobalMemoryBlock(base));
883 code.AddLine(" uint {}[];", GetGlobalMemory(base));
884 code.AddLine("}};");
885 code.AddNewLine();
886 }
887 }
888
889 void DeclareSamplers() {
890 u32 binding = device.GetBaseBindings(stage).sampler;
891 for (const auto& sampler : ir.GetSamplers()) {
892 const std::string name = GetSampler(sampler);
893 const std::string description = fmt::format("layout (binding = {}) uniform", binding);
894 binding += sampler.is_indexed ? sampler.size : 1;
895
896 std::string sampler_type = [&]() {
897 if (sampler.is_buffer) {
898 return "samplerBuffer";
899 }
900 switch (sampler.type) {
901 case TextureType::Texture1D:
902 return "sampler1D";
903 case TextureType::Texture2D:
904 return "sampler2D";
905 case TextureType::Texture3D:
906 return "sampler3D";
907 case TextureType::TextureCube:
908 return "samplerCube";
909 default:
910 UNREACHABLE();
911 return "sampler2D";
912 }
913 }();
914 if (sampler.is_array) {
915 sampler_type += "Array";
916 }
917 if (sampler.is_shadow) {
918 sampler_type += "Shadow";
919 }
920
921 if (!sampler.is_indexed) {
922 code.AddLine("{} {} {};", description, sampler_type, name);
923 } else {
924 code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size);
925 }
926 }
927 if (!ir.GetSamplers().empty()) {
928 code.AddNewLine();
929 }
930 }
931
932 void DeclarePhysicalAttributeReader() {
933 if (!ir.HasPhysicalAttributes()) {
934 return;
935 }
936 code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{");
937 ++code.scope;
938 code.AddLine("switch (physical_address) {{");
939
940 // Just declare generic attributes for now.
941 const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())};
942 for (u32 index = 0; index < num_attributes; ++index) {
943 const auto attribute{ToGenericAttribute(index)};
944 for (u32 element = 0; element < 4; ++element) {
945 constexpr u32 generic_base = 0x80;
946 constexpr u32 generic_stride = 16;
947 constexpr u32 element_stride = 4;
948 const u32 address{generic_base + index * generic_stride + element * element_stride};
949
950 const bool declared = stage != ShaderType::Fragment ||
951 header.ps.GetPixelImap(index) != PixelImap::Unused;
952 const std::string value =
953 declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
954 code.AddLine("case 0x{:X}U: return {};", address, value);
955 }
956 }
957
958 code.AddLine("default: return 0;");
959
960 code.AddLine("}}");
961 --code.scope;
962 code.AddLine("}}");
963 code.AddNewLine();
964 }
965
966 void DeclareImages() {
967 u32 binding = device.GetBaseBindings(stage).image;
968 for (const auto& image : ir.GetImages()) {
969 std::string qualifier = "coherent volatile";
970 if (image.is_read && !image.is_written) {
971 qualifier += " readonly";
972 } else if (image.is_written && !image.is_read) {
973 qualifier += " writeonly";
974 }
975
976 const char* format = image.is_atomic ? "r32ui, " : "";
977 const char* type_declaration = GetImageTypeDeclaration(image.type);
978 code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++,
979 qualifier, type_declaration, GetImage(image));
980 }
981 if (!ir.GetImages().empty()) {
982 code.AddNewLine();
983 }
984 }
985
986 void VisitBlock(const NodeBlock& bb) {
987 for (const auto& node : bb) {
988 Visit(node).CheckVoid();
989 }
990 }
991
992 Expression Visit(const Node& node) {
993 if (const auto operation = std::get_if<OperationNode>(&*node)) {
994 if (const auto amend_index = operation->GetAmendIndex()) {
995 Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
996 }
997 const auto operation_index = static_cast<std::size_t>(operation->GetCode());
998 if (operation_index >= operation_decompilers.size()) {
999 UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
1000 return {};
1001 }
1002 const auto decompiler = operation_decompilers[operation_index];
1003 if (decompiler == nullptr) {
1004 UNREACHABLE_MSG("Undefined operation: {}", operation_index);
1005 return {};
1006 }
1007 return (this->*decompiler)(*operation);
1008 }
1009
1010 if (const auto gpr = std::get_if<GprNode>(&*node)) {
1011 const u32 index = gpr->GetIndex();
1012 if (index == Register::ZeroIndex) {
1013 return {"0U", Type::Uint};
1014 }
1015 return {GetRegister(index), Type::Float};
1016 }
1017
1018 if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
1019 const u32 index = cv->GetIndex();
1020 return {GetCustomVariable(index), Type::Float};
1021 }
1022
1023 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
1024 const u32 value = immediate->GetValue();
1025 if (value < 10) {
1026 // For eyecandy avoid using hex numbers on single digits
1027 return {fmt::format("{}U", immediate->GetValue()), Type::Uint};
1028 }
1029 return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint};
1030 }
1031
1032 if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
1033 const auto value = [&]() -> std::string {
1034 switch (const auto index = predicate->GetIndex(); index) {
1035 case Tegra::Shader::Pred::UnusedIndex:
1036 return "true";
1037 case Tegra::Shader::Pred::NeverExecute:
1038 return "false";
1039 default:
1040 return GetPredicate(index);
1041 }
1042 }();
1043 if (predicate->IsNegated()) {
1044 return {fmt::format("!({})", value), Type::Bool};
1045 }
1046 return {value, Type::Bool};
1047 }
1048
1049 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
1050 UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry,
1051 "Physical attributes in geometry shaders are not implemented");
1052 if (abuf->IsPhysicalBuffer()) {
1053 return {fmt::format("ReadPhysicalAttribute({})",
1054 Visit(abuf->GetPhysicalAddress()).AsUint()),
1055 Type::Float};
1056 }
1057 return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
1058 }
1059
1060 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1061 const Node offset = cbuf->GetOffset();
1062
1063 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
1064 // Direct access
1065 const u32 offset_imm = immediate->GetValue();
1066 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
1067 return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
1068 offset_imm / (4 * 4), (offset_imm / 4) % 4),
1069 Type::Uint};
1070 }
1071
1072 // Indirect access
1073 const std::string final_offset = code.GenerateTemporary();
1074 code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
1075
1076 if (!device.HasComponentIndexingBug()) {
1077 return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
1078 final_offset, final_offset),
1079 Type::Uint};
1080 }
1081
1082 // AMD's proprietary GLSL compiler emits ill code for variable component access.
1083 // To bypass this driver bug generate 4 ifs, one per each component.
1084 const std::string pack = code.GenerateTemporary();
1085 code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
1086 final_offset);
1087
1088 const std::string result = code.GenerateTemporary();
1089 code.AddLine("uint {};", result);
1090 for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
1091 code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack,
1092 GetSwizzle(swizzle));
1093 }
1094 return {result, Type::Uint};
1095 }
1096
1097 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1098 const std::string real = Visit(gmem->GetRealAddress()).AsUint();
1099 const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
1100 const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
1101 return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
1102 Type::Uint};
1103 }
1104
1105 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
1106 return {
1107 fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
1108 Type::Uint};
1109 }
1110
1111 if (const auto smem = std::get_if<SmemNode>(&*node)) {
1112 return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
1113 }
1114
1115 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
1116 return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool};
1117 }
1118
1119 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
1120 if (const auto amend_index = conditional->GetAmendIndex()) {
1121 Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
1122 }
1123 // It's invalid to call conditional on nested nodes, use an operation instead
1124 code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
1125 ++code.scope;
1126
1127 VisitBlock(conditional->GetCode());
1128
1129 --code.scope;
1130 code.AddLine("}}");
1131 return {};
1132 }
1133
1134 if (const auto comment = std::get_if<CommentNode>(&*node)) {
1135 code.AddLine("// " + comment->GetText());
1136 return {};
1137 }
1138
1139 UNREACHABLE();
1140 return {};
1141 }
1142
1143 Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
1144 const auto GeometryPass = [&](std::string_view name) {
1145 if (stage == ShaderType::Geometry && buffer) {
1146 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
1147 // set an 0x80000000 index for those and the shader fails to build. Find out why
1148 // this happens and what's its intent.
1149 return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
1150 max_input_vertices.value());
1151 }
1152 return std::string(name);
1153 };
1154
1155 switch (attribute) {
1156 case Attribute::Index::Position:
1157 switch (stage) {
1158 case ShaderType::Geometry:
1159 return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(),
1160 GetSwizzle(element)),
1161 Type::Float};
1162 case ShaderType::Fragment:
1163 return {"gl_FragCoord"s + GetSwizzle(element), Type::Float};
1164 default:
1165 UNREACHABLE();
1166 return {"0", Type::Int};
1167 }
1168 case Attribute::Index::FrontColor:
1169 return {"gl_Color"s + GetSwizzle(element), Type::Float};
1170 case Attribute::Index::FrontSecondaryColor:
1171 return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float};
1172 case Attribute::Index::PointCoord:
1173 switch (element) {
1174 case 0:
1175 return {"gl_PointCoord.x", Type::Float};
1176 case 1:
1177 return {"gl_PointCoord.y", Type::Float};
1178 case 2:
1179 case 3:
1180 return {"0.0f", Type::Float};
1181 }
1182 UNREACHABLE();
1183 return {"0", Type::Int};
1184 case Attribute::Index::TessCoordInstanceIDVertexID:
1185 // TODO(Subv): Find out what the values are for the first two elements when inside a
1186 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
1187 // shader.
1188 ASSERT(stage == ShaderType::Vertex);
1189 switch (element) {
1190 case 2:
1191 // Config pack's first value is instance_id.
1192 return {"gl_InstanceID", Type::Int};
1193 case 3:
1194 return {"gl_VertexID", Type::Int};
1195 }
1196 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
1197 return {"0", Type::Int};
1198 case Attribute::Index::FrontFacing:
1199 // TODO(Subv): Find out what the values are for the other elements.
1200 ASSERT(stage == ShaderType::Fragment);
1201 switch (element) {
1202 case 3:
1203 return {"(gl_FrontFacing ? -1 : 0)", Type::Int};
1204 }
1205 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
1206 return {"0", Type::Int};
1207 default:
1208 if (IsGenericAttribute(attribute)) {
1209 return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
1210 Type::Float};
1211 }
1212 if (IsLegacyTexCoord(attribute)) {
1213 UNIMPLEMENTED_IF(stage == ShaderType::Geometry);
1214 return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
1215 GetSwizzle(element)),
1216 Type::Float};
1217 }
1218 break;
1219 }
1220 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
1221 return {"0", Type::Int};
1222 }
1223
1224 Expression ApplyPrecise(Operation operation, std::string value, Type type) {
1225 if (!IsPrecise(operation)) {
1226 return {std::move(value), type};
1227 }
1228 // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to
1229 // be found in fragment shaders, so we disable precise there. There are vertex shaders that
1230 // also fail to build but nobody seems to care about those.
1231 // Note: Only bugged drivers will skip precise.
1232 const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment;
1233
1234 std::string temporary = code.GenerateTemporary();
1235 code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type),
1236 temporary, value);
1237 return {std::move(temporary), type};
1238 }
1239
1240 Expression VisitOperand(Operation operation, std::size_t operand_index) {
1241 const auto& operand = operation[operand_index];
1242 const bool parent_precise = IsPrecise(operation);
1243 const bool child_precise = IsPrecise(operand);
1244 const bool child_trivial = !std::holds_alternative<OperationNode>(*operand);
1245 if (!parent_precise || child_precise || child_trivial) {
1246 return Visit(operand);
1247 }
1248
1249 Expression value = Visit(operand);
1250 std::string temporary = code.GenerateTemporary();
1251 code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode());
1252 return {std::move(temporary), value.GetType()};
1253 }
1254
1255 std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) {
1256 const u32 element = abuf->GetElement();
1257 switch (const auto attribute = abuf->GetIndex()) {
1258 case Attribute::Index::Position:
1259 return {{"gl_Position"s + GetSwizzle(element), Type::Float}};
1260 case Attribute::Index::LayerViewportPointSize:
1261 switch (element) {
1262 case 0:
1263 UNIMPLEMENTED();
1264 return std::nullopt;
1265 case 1:
1266 if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
1267 return std::nullopt;
1268 }
1269 return {{"gl_Layer", Type::Int}};
1270 case 2:
1271 if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
1272 return std::nullopt;
1273 }
1274 return {{"gl_ViewportIndex", Type::Int}};
1275 case 3:
1276 return {{"gl_PointSize", Type::Float}};
1277 }
1278 return std::nullopt;
1279 case Attribute::Index::FrontColor:
1280 return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}};
1281 case Attribute::Index::FrontSecondaryColor:
1282 return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}};
1283 case Attribute::Index::BackColor:
1284 return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}};
1285 case Attribute::Index::BackSecondaryColor:
1286 return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}};
1287 case Attribute::Index::ClipDistances0123:
1288 return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}};
1289 case Attribute::Index::ClipDistances4567:
1290 return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}};
1291 default:
1292 if (IsGenericAttribute(attribute)) {
1293 return {{GetGenericOutputAttribute(attribute, element), Type::Float}};
1294 }
1295 if (IsLegacyTexCoord(attribute)) {
1296 return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
1297 GetSwizzle(element)),
1298 Type::Float}};
1299 }
1300 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute);
1301 return std::nullopt;
1302 }
1303 }
1304
1305 Expression GenerateUnary(Operation operation, std::string_view func, Type result_type,
1306 Type type_a) {
1307 std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a));
1308 return ApplyPrecise(operation, std::move(op_str), result_type);
1309 }
1310
1311 Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type,
1312 Type type_a, Type type_b) {
1313 const std::string op_a = VisitOperand(operation, 0).As(type_a);
1314 const std::string op_b = VisitOperand(operation, 1).As(type_b);
1315 std::string op_str = fmt::format("({} {} {})", op_a, func, op_b);
1316
1317 return ApplyPrecise(operation, std::move(op_str), result_type);
1318 }
1319
1320 Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type,
1321 Type type_a, Type type_b) {
1322 const std::string op_a = VisitOperand(operation, 0).As(type_a);
1323 const std::string op_b = VisitOperand(operation, 1).As(type_b);
1324 std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b);
1325
1326 return ApplyPrecise(operation, std::move(op_str), result_type);
1327 }
1328
1329 Expression GenerateTernary(Operation operation, std::string_view func, Type result_type,
1330 Type type_a, Type type_b, Type type_c) {
1331 const std::string op_a = VisitOperand(operation, 0).As(type_a);
1332 const std::string op_b = VisitOperand(operation, 1).As(type_b);
1333 const std::string op_c = VisitOperand(operation, 2).As(type_c);
1334 std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c);
1335
1336 return ApplyPrecise(operation, std::move(op_str), result_type);
1337 }
1338
1339 Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
1340 Type type_a, Type type_b, Type type_c, Type type_d) {
1341 const std::string op_a = VisitOperand(operation, 0).As(type_a);
1342 const std::string op_b = VisitOperand(operation, 1).As(type_b);
1343 const std::string op_c = VisitOperand(operation, 2).As(type_c);
1344 const std::string op_d = VisitOperand(operation, 3).As(type_d);
1345 std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d);
1346
1347 return ApplyPrecise(operation, std::move(op_str), result_type);
1348 }
1349
1350 std::string GenerateTexture(Operation operation, const std::string& function_suffix,
1351 const std::vector<TextureIR>& extras, bool separate_dc = false) {
1352 constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};
1353
1354 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1355 ASSERT(meta);
1356
1357 const std::size_t count = operation.GetOperandsCount();
1358 const bool has_array = meta->sampler.is_array;
1359 const bool has_shadow = meta->sampler.is_shadow;
1360 const bool workaround_lod_array_shadow_as_grad =
1361 !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow &&
1362 ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
1363 meta->sampler.type == TextureType::TextureCube);
1364
1365 std::string expr = "texture";
1366
1367 if (workaround_lod_array_shadow_as_grad) {
1368 expr += "Grad";
1369 } else {
1370 expr += function_suffix;
1371 }
1372
1373 if (!meta->aoffi.empty()) {
1374 expr += "Offset";
1375 } else if (!meta->ptp.empty()) {
1376 expr += "Offsets";
1377 }
1378 if (!meta->sampler.is_indexed) {
1379 expr += '(' + GetSampler(meta->sampler) + ", ";
1380 } else {
1381 expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], ";
1382 }
1383 expr += coord_constructors.at(count + (has_array ? 1 : 0) +
1384 (has_shadow && !separate_dc ? 1 : 0) - 1);
1385 expr += '(';
1386 for (std::size_t i = 0; i < count; ++i) {
1387 expr += Visit(operation[i]).AsFloat();
1388
1389 const std::size_t next = i + 1;
1390 if (next < count)
1391 expr += ", ";
1392 }
1393 if (has_array) {
1394 expr += ", float(" + Visit(meta->array).AsInt() + ')';
1395 }
1396 if (has_shadow) {
1397 if (separate_dc) {
1398 expr += "), " + Visit(meta->depth_compare).AsFloat();
1399 } else {
1400 expr += ", " + Visit(meta->depth_compare).AsFloat() + ')';
1401 }
1402 } else {
1403 expr += ')';
1404 }
1405
1406 if (workaround_lod_array_shadow_as_grad) {
1407 switch (meta->sampler.type) {
1408 case TextureType::Texture2D:
1409 return expr + ", vec2(0.0), vec2(0.0))";
1410 case TextureType::TextureCube:
1411 return expr + ", vec3(0.0), vec3(0.0))";
1412 default:
1413 UNREACHABLE();
1414 break;
1415 }
1416 }
1417
1418 for (const auto& variant : extras) {
1419 if (const auto argument = std::get_if<TextureArgument>(&variant)) {
1420 expr += GenerateTextureArgument(*argument);
1421 } else if (std::holds_alternative<TextureOffset>(variant)) {
1422 if (!meta->aoffi.empty()) {
1423 expr += GenerateTextureAoffi(meta->aoffi);
1424 } else if (!meta->ptp.empty()) {
1425 expr += GenerateTexturePtp(meta->ptp);
1426 }
1427 } else if (std::holds_alternative<TextureDerivates>(variant)) {
1428 expr += GenerateTextureDerivates(meta->derivates);
1429 } else {
1430 UNREACHABLE();
1431 }
1432 }
1433
1434 return expr + ')';
1435 }
1436
1437 std::string GenerateTextureArgument(const TextureArgument& argument) {
1438 const auto& [type, operand] = argument;
1439 if (operand == nullptr) {
1440 return {};
1441 }
1442
1443 std::string expr = ", ";
1444 switch (type) {
1445 case Type::Int:
1446 if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
1447 // Inline the string as an immediate integer in GLSL (some extra arguments are
1448 // required to be constant)
1449 expr += std::to_string(static_cast<s32>(immediate->GetValue()));
1450 } else {
1451 expr += Visit(operand).AsInt();
1452 }
1453 break;
1454 case Type::Float:
1455 expr += Visit(operand).AsFloat();
1456 break;
1457 default: {
1458 const auto type_int = static_cast<u32>(type);
1459 UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
1460 expr += '0';
1461 break;
1462 }
1463 }
1464 return expr;
1465 }
1466
1467 std::string ReadTextureOffset(const Node& value) {
1468 if (const auto immediate = std::get_if<ImmediateNode>(&*value)) {
1469 // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
1470 // to be constant by the standard).
1471 return std::to_string(static_cast<s32>(immediate->GetValue()));
1472 } else if (device.HasVariableAoffi()) {
1473 // Avoid using variable AOFFI on unsupported devices.
1474 return Visit(value).AsInt();
1475 } else {
1476 // Insert 0 on devices not supporting variable AOFFI.
1477 return "0";
1478 }
1479 }
1480
1481 std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
1482 if (aoffi.empty()) {
1483 return {};
1484 }
1485 constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"};
1486 std::string expr = ", ";
1487 expr += coord_constructors.at(aoffi.size() - 1);
1488 expr += '(';
1489
1490 for (std::size_t index = 0; index < aoffi.size(); ++index) {
1491 expr += ReadTextureOffset(aoffi.at(index));
1492 if (index + 1 < aoffi.size()) {
1493 expr += ", ";
1494 }
1495 }
1496 expr += ')';
1497
1498 return expr;
1499 }
1500
1501 std::string GenerateTexturePtp(const std::vector<Node>& ptp) {
1502 static constexpr std::size_t num_vectors = 4;
1503 ASSERT(ptp.size() == num_vectors * 2);
1504
1505 std::string expr = ", ivec2[](";
1506 for (std::size_t vector = 0; vector < num_vectors; ++vector) {
1507 const bool has_next = vector + 1 < num_vectors;
1508 expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)),
1509 ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : "");
1510 }
1511 expr += ')';
1512 return expr;
1513 }
1514
1515 std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
1516 if (derivates.empty()) {
1517 return {};
1518 }
1519 constexpr std::array coord_constructors = {"float", "vec2", "vec3"};
1520 std::string expr = ", ";
1521 const std::size_t components = derivates.size() / 2;
1522 std::string dx = coord_constructors.at(components - 1);
1523 std::string dy = coord_constructors.at(components - 1);
1524 dx += '(';
1525 dy += '(';
1526
1527 for (std::size_t index = 0; index < components; ++index) {
1528 const auto& operand_x{derivates.at(index * 2)};
1529 const auto& operand_y{derivates.at(index * 2 + 1)};
1530 dx += Visit(operand_x).AsFloat();
1531 dy += Visit(operand_y).AsFloat();
1532
1533 if (index + 1 < components) {
1534 dx += ", ";
1535 dy += ", ";
1536 }
1537 }
1538 dx += ')';
1539 dy += ')';
1540 expr += dx + ", " + dy;
1541
1542 return expr;
1543 }
1544
1545 std::string BuildIntegerCoordinates(Operation operation) {
1546 constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
1547 const std::size_t coords_count{operation.GetOperandsCount()};
1548 std::string expr = constructors.at(coords_count - 1);
1549 for (std::size_t i = 0; i < coords_count; ++i) {
1550 expr += VisitOperand(operation, i).AsInt();
1551 if (i + 1 < coords_count) {
1552 expr += ", ";
1553 }
1554 }
1555 expr += ')';
1556 return expr;
1557 }
1558
1559 std::string BuildImageValues(Operation operation) {
1560 constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"};
1561 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
1562
1563 const std::size_t values_count{meta.values.size()};
1564 std::string expr = fmt::format("{}(", constructors.at(values_count - 1));
1565 for (std::size_t i = 0; i < values_count; ++i) {
1566 expr += Visit(meta.values.at(i)).AsUint();
1567 if (i + 1 < values_count) {
1568 expr += ", ";
1569 }
1570 }
1571 expr += ')';
1572 return expr;
1573 }
1574
1575 Expression Assign(Operation operation) {
1576 const Node& dest = operation[0];
1577 const Node& src = operation[1];
1578
1579 Expression target;
1580 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1581 if (gpr->GetIndex() == Register::ZeroIndex) {
1582 // Writing to Register::ZeroIndex is a no op but we still have to visit the source
1583 // as it might have side effects.
1584 code.AddLine("{};", Visit(src).GetCode());
1585 return {};
1586 }
1587 target = {GetRegister(gpr->GetIndex()), Type::Float};
1588 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
1589 UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
1590 auto output = GetOutputAttribute(abuf);
1591 if (!output) {
1592 return {};
1593 }
1594 target = std::move(*output);
1595 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1596 target = {
1597 fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
1598 Type::Uint};
1599 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1600 ASSERT(stage == ShaderType::Compute);
1601 target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
1602 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1603 const std::string real = Visit(gmem->GetRealAddress()).AsUint();
1604 const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
1605 const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
1606 target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
1607 Type::Uint};
1608 } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
1609 target = {GetCustomVariable(cv->GetIndex()), Type::Float};
1610 } else {
1611 UNREACHABLE_MSG("Assign called without a proper target");
1612 }
1613
1614 code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType()));
1615 return {};
1616 }
1617
1618 template <Type type>
1619 Expression Add(Operation operation) {
1620 return GenerateBinaryInfix(operation, "+", type, type, type);
1621 }
1622
1623 template <Type type>
1624 Expression Mul(Operation operation) {
1625 return GenerateBinaryInfix(operation, "*", type, type, type);
1626 }
1627
1628 template <Type type>
1629 Expression Div(Operation operation) {
1630 return GenerateBinaryInfix(operation, "/", type, type, type);
1631 }
1632
1633 template <Type type>
1634 Expression Fma(Operation operation) {
1635 return GenerateTernary(operation, "fma", type, type, type, type);
1636 }
1637
1638 template <Type type>
1639 Expression Negate(Operation operation) {
1640 return GenerateUnary(operation, "-", type, type);
1641 }
1642
1643 template <Type type>
1644 Expression Absolute(Operation operation) {
1645 return GenerateUnary(operation, "abs", type, type);
1646 }
1647
1648 Expression FClamp(Operation operation) {
1649 return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float,
1650 Type::Float);
1651 }
1652
1653 Expression FCastHalf0(Operation operation) {
1654 return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
1655 }
1656
1657 Expression FCastHalf1(Operation operation) {
1658 return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
1659 }
1660
1661 template <Type type>
1662 Expression Min(Operation operation) {
1663 return GenerateBinaryCall(operation, "min", type, type, type);
1664 }
1665
1666 template <Type type>
1667 Expression Max(Operation operation) {
1668 return GenerateBinaryCall(operation, "max", type, type, type);
1669 }
1670
1671 Expression Select(Operation operation) {
1672 const std::string condition = Visit(operation[0]).AsBool();
1673 const std::string true_case = Visit(operation[1]).AsUint();
1674 const std::string false_case = Visit(operation[2]).AsUint();
1675 std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case);
1676
1677 return ApplyPrecise(operation, std::move(op_str), Type::Uint);
1678 }
1679
1680 Expression FCos(Operation operation) {
1681 return GenerateUnary(operation, "cos", Type::Float, Type::Float);
1682 }
1683
1684 Expression FSin(Operation operation) {
1685 return GenerateUnary(operation, "sin", Type::Float, Type::Float);
1686 }
1687
1688 Expression FExp2(Operation operation) {
1689 return GenerateUnary(operation, "exp2", Type::Float, Type::Float);
1690 }
1691
1692 Expression FLog2(Operation operation) {
1693 return GenerateUnary(operation, "log2", Type::Float, Type::Float);
1694 }
1695
1696 Expression FInverseSqrt(Operation operation) {
1697 return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float);
1698 }
1699
1700 Expression FSqrt(Operation operation) {
1701 return GenerateUnary(operation, "sqrt", Type::Float, Type::Float);
1702 }
1703
1704 Expression FRoundEven(Operation operation) {
1705 return GenerateUnary(operation, "roundEven", Type::Float, Type::Float);
1706 }
1707
1708 Expression FFloor(Operation operation) {
1709 return GenerateUnary(operation, "floor", Type::Float, Type::Float);
1710 }
1711
1712 Expression FCeil(Operation operation) {
1713 return GenerateUnary(operation, "ceil", Type::Float, Type::Float);
1714 }
1715
1716 Expression FTrunc(Operation operation) {
1717 return GenerateUnary(operation, "trunc", Type::Float, Type::Float);
1718 }
1719
1720 template <Type type>
1721 Expression FCastInteger(Operation operation) {
1722 return GenerateUnary(operation, "float", Type::Float, type);
1723 }
1724
1725 Expression FSwizzleAdd(Operation operation) {
1726 const std::string op_a = VisitOperand(operation, 0).AsFloat();
1727 const std::string op_b = VisitOperand(operation, 1).AsFloat();
1728
1729 if (!device.HasShaderBallot()) {
1730 LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
1731 return {fmt::format("{} + {}", op_a, op_b), Type::Float};
1732 }
1733
1734 const std::string instr_mask = VisitOperand(operation, 2).AsUint();
1735 const std::string mask = code.GenerateTemporary();
1736 code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask,
1737 instr_mask);
1738
1739 const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask);
1740 const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask);
1741 return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b),
1742 Type::Float};
1743 }
1744
1745 Expression ICastFloat(Operation operation) {
1746 return GenerateUnary(operation, "int", Type::Int, Type::Float);
1747 }
1748
1749 Expression ICastUnsigned(Operation operation) {
1750 return GenerateUnary(operation, "int", Type::Int, Type::Uint);
1751 }
1752
1753 template <Type type>
1754 Expression LogicalShiftLeft(Operation operation) {
1755 return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint);
1756 }
1757
1758 Expression ILogicalShiftRight(Operation operation) {
1759 const std::string op_a = VisitOperand(operation, 0).AsUint();
1760 const std::string op_b = VisitOperand(operation, 1).AsUint();
1761 std::string op_str = fmt::format("int({} >> {})", op_a, op_b);
1762
1763 return ApplyPrecise(operation, std::move(op_str), Type::Int);
1764 }
1765
1766 Expression IArithmeticShiftRight(Operation operation) {
1767 return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint);
1768 }
1769
1770 template <Type type>
1771 Expression BitwiseAnd(Operation operation) {
1772 return GenerateBinaryInfix(operation, "&", type, type, type);
1773 }
1774
1775 template <Type type>
1776 Expression BitwiseOr(Operation operation) {
1777 return GenerateBinaryInfix(operation, "|", type, type, type);
1778 }
1779
1780 template <Type type>
1781 Expression BitwiseXor(Operation operation) {
1782 return GenerateBinaryInfix(operation, "^", type, type, type);
1783 }
1784
1785 template <Type type>
1786 Expression BitwiseNot(Operation operation) {
1787 return GenerateUnary(operation, "~", type, type);
1788 }
1789
1790 Expression UCastFloat(Operation operation) {
1791 return GenerateUnary(operation, "uint", Type::Uint, Type::Float);
1792 }
1793
1794 Expression UCastSigned(Operation operation) {
1795 return GenerateUnary(operation, "uint", Type::Uint, Type::Int);
1796 }
1797
1798 Expression UShiftRight(Operation operation) {
1799 return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint);
1800 }
1801
1802 template <Type type>
1803 Expression BitfieldInsert(Operation operation) {
1804 return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int,
1805 Type::Int);
1806 }
1807
1808 template <Type type>
1809 Expression BitfieldExtract(Operation operation) {
1810 return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int);
1811 }
1812
1813 template <Type type>
1814 Expression BitCount(Operation operation) {
1815 return GenerateUnary(operation, "bitCount", type, type);
1816 }
1817
1818 template <Type type>
1819 Expression BitMSB(Operation operation) {
1820 return GenerateUnary(operation, "findMSB", type, type);
1821 }
1822
1823 Expression HNegate(Operation operation) {
1824 const auto GetNegate = [&](std::size_t index) {
1825 return VisitOperand(operation, index).AsBool() + " ? -1 : 1";
1826 };
1827 return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(),
1828 GetNegate(1), GetNegate(2)),
1829 Type::HalfFloat};
1830 }
1831
1832 Expression HClamp(Operation operation) {
1833 const std::string value = VisitOperand(operation, 0).AsHalfFloat();
1834 const std::string min = VisitOperand(operation, 1).AsFloat();
1835 const std::string max = VisitOperand(operation, 2).AsFloat();
1836 std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max);
1837
1838 return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat);
1839 }
1840
1841 Expression HCastFloat(Operation operation) {
1842 return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()),
1843 Type::HalfFloat};
1844 }
1845
1846 Expression HUnpack(Operation operation) {
1847 Expression operand = VisitOperand(operation, 0);
1848 switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
1849 case Tegra::Shader::HalfType::H0_H1:
1850 return operand;
1851 case Tegra::Shader::HalfType::F32:
1852 return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat};
1853 case Tegra::Shader::HalfType::H0_H0:
1854 return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat};
1855 case Tegra::Shader::HalfType::H1_H1:
1856 return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat};
1857 }
1858 UNREACHABLE();
1859 return {"0", Type::Int};
1860 }
1861
1862 Expression HMergeF32(Operation operation) {
1863 return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
1864 }
1865
1866 Expression HMergeH0(Operation operation) {
1867 const std::string dest = VisitOperand(operation, 0).AsUint();
1868 const std::string src = VisitOperand(operation, 1).AsUint();
1869 return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest),
1870 Type::HalfFloat};
1871 }
1872
1873 Expression HMergeH1(Operation operation) {
1874 const std::string dest = VisitOperand(operation, 0).AsUint();
1875 const std::string src = VisitOperand(operation, 1).AsUint();
1876 return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src),
1877 Type::HalfFloat};
1878 }
1879
1880 Expression HPack2(Operation operation) {
1881 return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(),
1882 VisitOperand(operation, 1).AsFloat()),
1883 Type::HalfFloat};
1884 }
1885
1886 template <const std::string_view& op, Type type, bool unordered = false>
1887 Expression Comparison(Operation operation) {
1888 static_assert(!unordered || type == Type::Float);
1889
1890 Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
1891
1892 if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
1893 // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's
1894 // and Nvidia's proprietary stacks. Manually force an ordered comparison.
1895 return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(),
1896 VisitOperand(operation, 0).AsFloat(),
1897 VisitOperand(operation, 1).AsFloat()),
1898 Type::Bool};
1899 }
1900 if constexpr (!unordered) {
1901 return expr;
1902 }
1903 // Unordered comparisons are always true for NaN operands.
1904 return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(),
1905 VisitOperand(operation, 0).AsFloat(),
1906 VisitOperand(operation, 1).AsFloat()),
1907 Type::Bool};
1908 }
1909
1910 Expression FOrdered(Operation operation) {
1911 return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(),
1912 VisitOperand(operation, 1).AsFloat()),
1913 Type::Bool};
1914 }
1915
1916 Expression FUnordered(Operation operation) {
1917 return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(),
1918 VisitOperand(operation, 1).AsFloat()),
1919 Type::Bool};
1920 }
1921
1922 Expression LogicalAddCarry(Operation operation) {
1923 const std::string carry = code.GenerateTemporary();
1924 code.AddLine("uint {};", carry);
1925 code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(),
1926 VisitOperand(operation, 1).AsUint(), carry);
1927 return {fmt::format("({} != 0)", carry), Type::Bool};
1928 }
1929
1930 Expression LogicalAssign(Operation operation) {
1931 const Node& dest = operation[0];
1932 const Node& src = operation[1];
1933
1934 std::string target;
1935
1936 if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
1937 ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
1938
1939 const auto index = pred->GetIndex();
1940 switch (index) {
1941 case Tegra::Shader::Pred::NeverExecute:
1942 case Tegra::Shader::Pred::UnusedIndex:
1943 // Writing to these predicates is a no-op
1944 return {};
1945 }
1946 target = GetPredicate(index);
1947 } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) {
1948 target = GetInternalFlag(flag->GetFlag());
1949 }
1950
1951 code.AddLine("{} = {};", target, Visit(src).AsBool());
1952 return {};
1953 }
1954
1955 Expression LogicalAnd(Operation operation) {
1956 return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool);
1957 }
1958
1959 Expression LogicalOr(Operation operation) {
1960 return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool);
1961 }
1962
1963 Expression LogicalXor(Operation operation) {
1964 return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool);
1965 }
1966
1967 Expression LogicalNegate(Operation operation) {
1968 return GenerateUnary(operation, "!", Type::Bool, Type::Bool);
1969 }
1970
1971 Expression LogicalPick2(Operation operation) {
1972 return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(),
1973 VisitOperand(operation, 1).AsUint()),
1974 Type::Bool};
1975 }
1976
1977 Expression LogicalAnd2(Operation operation) {
1978 return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
1979 }
1980
1981 template <bool with_nan>
1982 Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) {
1983 Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2,
1984 Type::HalfFloat, Type::HalfFloat);
1985 if constexpr (!with_nan) {
1986 return comparison;
1987 }
1988 return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(),
1989 VisitOperand(operation, 0).AsHalfFloat(),
1990 VisitOperand(operation, 1).AsHalfFloat()),
1991 Type::Bool2};
1992 }
1993
1994 template <bool with_nan>
1995 Expression Logical2HLessThan(Operation operation) {
1996 return GenerateHalfComparison<with_nan>(operation, "lessThan");
1997 }
1998
1999 template <bool with_nan>
2000 Expression Logical2HEqual(Operation operation) {
2001 return GenerateHalfComparison<with_nan>(operation, "equal");
2002 }
2003
2004 template <bool with_nan>
2005 Expression Logical2HLessEqual(Operation operation) {
2006 return GenerateHalfComparison<with_nan>(operation, "lessThanEqual");
2007 }
2008
2009 template <bool with_nan>
2010 Expression Logical2HGreaterThan(Operation operation) {
2011 return GenerateHalfComparison<with_nan>(operation, "greaterThan");
2012 }
2013
2014 template <bool with_nan>
2015 Expression Logical2HNotEqual(Operation operation) {
2016 return GenerateHalfComparison<with_nan>(operation, "notEqual");
2017 }
2018
2019 template <bool with_nan>
2020 Expression Logical2HGreaterEqual(Operation operation) {
2021 return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual");
2022 }
2023
2024 Expression Texture(Operation operation) {
2025 const auto meta = std::get<MetaTexture>(operation.GetMeta());
2026 const bool separate_dc = meta.sampler.type == TextureType::TextureCube &&
2027 meta.sampler.is_array && meta.sampler.is_shadow;
2028 // TODO: Replace this with an array and make GenerateTexture use C++20 std::span
2029 const std::vector<TextureIR> extras{
2030 TextureOffset{},
2031 TextureArgument{Type::Float, meta.bias},
2032 };
2033 std::string expr = GenerateTexture(operation, "", extras, separate_dc);
2034 if (meta.sampler.is_shadow) {
2035 expr = fmt::format("vec4({})", expr);
2036 }
2037 return {expr + GetSwizzle(meta.element), Type::Float};
2038 }
2039
2040 Expression TextureLod(Operation operation) {
2041 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2042 ASSERT(meta);
2043
2044 std::string expr{};
2045
2046 if (!device.HasTextureShadowLod() && meta->sampler.is_shadow &&
2047 ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
2048 meta->sampler.type == TextureType::TextureCube)) {
2049 LOG_ERROR(Render_OpenGL,
2050 "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround");
2051 expr = GenerateTexture(operation, "Lod", {});
2052 } else {
2053 expr = GenerateTexture(operation, "Lod",
2054 {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
2055 }
2056
2057 if (meta->sampler.is_shadow) {
2058 expr = "vec4(" + expr + ')';
2059 }
2060 return {expr + GetSwizzle(meta->element), Type::Float};
2061 }
2062
2063 Expression TextureGather(Operation operation) {
2064 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
2065
2066 const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int;
2067 const bool separate_dc = meta.sampler.is_shadow;
2068
2069 std::vector<TextureIR> ir_;
2070 if (meta.sampler.is_shadow) {
2071 ir_ = {TextureOffset{}};
2072 } else {
2073 ir_ = {TextureOffset{}, TextureArgument{type, meta.component}};
2074 }
2075 return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element),
2076 Type::Float};
2077 }
2078
2079 Expression TextureQueryDimensions(Operation operation) {
2080 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2081 ASSERT(meta);
2082
2083 const std::string sampler = GetSampler(meta->sampler);
2084 const std::string lod = VisitOperand(operation, 0).AsInt();
2085
2086 switch (meta->element) {
2087 case 0:
2088 case 1:
2089 return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)),
2090 Type::Int};
2091 case 3:
2092 return {fmt::format("textureQueryLevels({})", sampler), Type::Int};
2093 }
2094 UNREACHABLE();
2095 return {"0", Type::Int};
2096 }
2097
2098 Expression TextureQueryLod(Operation operation) {
2099 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2100 ASSERT(meta);
2101
2102 if (meta->element < 2) {
2103 return {fmt::format("int(({} * vec2(256)){})",
2104 GenerateTexture(operation, "QueryLod", {}),
2105 GetSwizzle(meta->element)),
2106 Type::Int};
2107 }
2108 return {"0", Type::Int};
2109 }
2110
2111 Expression TexelFetch(Operation operation) {
2112 constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"};
2113 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2114 ASSERT(meta);
2115 UNIMPLEMENTED_IF(meta->sampler.is_array);
2116 const std::size_t count = operation.GetOperandsCount();
2117
2118 std::string expr = "texelFetch(";
2119 expr += GetSampler(meta->sampler);
2120 expr += ", ";
2121
2122 expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1);
2123 expr += '(';
2124 for (std::size_t i = 0; i < count; ++i) {
2125 if (i > 0) {
2126 expr += ", ";
2127 }
2128 expr += VisitOperand(operation, i).AsInt();
2129 }
2130 if (meta->array) {
2131 expr += ", ";
2132 expr += Visit(meta->array).AsInt();
2133 }
2134 expr += ')';
2135
2136 if (meta->lod && !meta->sampler.is_buffer) {
2137 expr += ", ";
2138 expr += Visit(meta->lod).AsInt();
2139 }
2140 expr += ')';
2141 expr += GetSwizzle(meta->element);
2142
2143 return {std::move(expr), Type::Float};
2144 }
2145
2146 Expression TextureGradient(Operation operation) {
2147 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
2148 std::string expr =
2149 GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}});
2150 return {std::move(expr) + GetSwizzle(meta.element), Type::Float};
2151 }
2152
2153 Expression ImageLoad(Operation operation) {
2154 if (!device.HasImageLoadFormatted()) {
2155 LOG_ERROR(Render_OpenGL,
2156 "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load");
2157 return {"0", Type::Int};
2158 }
2159
2160 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
2161 return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image),
2162 BuildIntegerCoordinates(operation), GetSwizzle(meta.element)),
2163 Type::Uint};
2164 }
2165
2166 Expression ImageStore(Operation operation) {
2167 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
2168 code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
2169 BuildIntegerCoordinates(operation), BuildImageValues(operation));
2170 return {};
2171 }
2172
2173 template <const std::string_view& opname>
2174 Expression AtomicImage(Operation operation) {
2175 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
2176 ASSERT(meta.values.size() == 1);
2177
2178 return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image),
2179 BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()),
2180 Type::Uint};
2181 }
2182
2183 template <const std::string_view& opname, Type type>
2184 Expression Atomic(Operation operation) {
2185 if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) {
2186 UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations");
2187 return {};
2188 }
2189 return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
2190 Visit(operation[1]).AsUint()),
2191 Type::Uint};
2192 }
2193
2194 template <const std::string_view& opname, Type type>
2195 Expression Reduce(Operation operation) {
2196 code.AddLine("{};", Atomic<opname, type>(operation).GetCode());
2197 return {};
2198 }
2199
2200 Expression Branch(Operation operation) {
2201 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
2202 UNIMPLEMENTED_IF(!target);
2203
2204 code.AddLine("jmp_to = 0x{:X}U;", target->GetValue());
2205 code.AddLine("break;");
2206 return {};
2207 }
2208
2209 Expression BranchIndirect(Operation operation) {
2210 const std::string op_a = VisitOperand(operation, 0).AsUint();
2211
2212 code.AddLine("jmp_to = {};", op_a);
2213 code.AddLine("break;");
2214 return {};
2215 }
2216
2217 Expression PushFlowStack(Operation operation) {
2218 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
2219 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
2220 UNIMPLEMENTED_IF(!target);
2221
2222 code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack),
2223 target->GetValue());
2224 return {};
2225 }
2226
2227 Expression PopFlowStack(Operation operation) {
2228 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
2229 code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack));
2230 code.AddLine("break;");
2231 return {};
2232 }
2233
2234 void PreExit() {
2235 if (stage != ShaderType::Fragment) {
2236 return;
2237 }
2238 const auto& used_registers = ir.GetRegisters();
2239 const auto SafeGetRegister = [&](u32 reg) -> Expression {
2240 // TODO(Rodrigo): Replace with contains once C++20 releases
2241 if (used_registers.find(reg) != used_registers.end()) {
2242 return {GetRegister(reg), Type::Float};
2243 }
2244 return {"0.0f", Type::Float};
2245 };
2246
2247 UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
2248
2249 // Write the color outputs using the data in the shader registers, disabled
2250 // rendertargets/components are skipped in the register assignment.
2251 u32 current_reg = 0;
2252 for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) {
2253 // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
2254 for (u32 component = 0; component < 4; ++component) {
2255 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
2256 code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component),
2257 SafeGetRegister(current_reg).AsFloat());
2258 ++current_reg;
2259 }
2260 }
2261 }
2262 if (header.ps.omap.depth) {
2263 // The depth output is always 2 registers after the last color output, and current_reg
2264 // already contains one past the last color register.
2265 code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat());
2266 }
2267 }
2268
2269 Expression Exit(Operation operation) {
2270 PreExit();
2271 code.AddLine("return;");
2272 return {};
2273 }
2274
2275 Expression Discard(Operation operation) {
2276 // Enclose "discard" in a conditional, so that GLSL compilation does not complain
2277 // about unexecuted instructions that may follow this.
2278 code.AddLine("if (true) {{");
2279 ++code.scope;
2280 code.AddLine("discard;");
2281 --code.scope;
2282 code.AddLine("}}");
2283 return {};
2284 }
2285
2286 Expression EmitVertex(Operation operation) {
2287 ASSERT_MSG(stage == ShaderType::Geometry,
2288 "EmitVertex is expected to be used in a geometry shader.");
2289 code.AddLine("EmitVertex();");
2290 return {};
2291 }
2292
2293 Expression EndPrimitive(Operation operation) {
2294 ASSERT_MSG(stage == ShaderType::Geometry,
2295 "EndPrimitive is expected to be used in a geometry shader.");
2296 code.AddLine("EndPrimitive();");
2297 return {};
2298 }
2299
2300 Expression InvocationId(Operation operation) {
2301 return {"gl_InvocationID", Type::Int};
2302 }
2303
2304 Expression YNegate(Operation operation) {
2305 // Y_NEGATE is mapped to this uniform value
2306 return {"gl_FrontMaterial.ambient.a", Type::Float};
2307 }
2308
2309 template <u32 element>
2310 Expression LocalInvocationId(Operation) {
2311 return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint};
2312 }
2313
2314 template <u32 element>
2315 Expression WorkGroupId(Operation) {
2316 return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint};
2317 }
2318
2319 Expression BallotThread(Operation operation) {
2320 const std::string value = VisitOperand(operation, 0).AsBool();
2321 if (!device.HasWarpIntrinsics()) {
2322 LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
2323 // Stub on non-Nvidia devices by simulating all threads voting the same as the active
2324 // one.
2325 return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
2326 }
2327 return {fmt::format("ballotThreadNV({})", value), Type::Uint};
2328 }
2329
2330 Expression Vote(Operation operation, const char* func) {
2331 const std::string value = VisitOperand(operation, 0).AsBool();
2332 if (!device.HasWarpIntrinsics()) {
2333 LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
2334 // Stub with a warp size of one.
2335 return {value, Type::Bool};
2336 }
2337 return {fmt::format("{}({})", func, value), Type::Bool};
2338 }
2339
2340 Expression VoteAll(Operation operation) {
2341 return Vote(operation, "allThreadsNV");
2342 }
2343
2344 Expression VoteAny(Operation operation) {
2345 return Vote(operation, "anyThreadNV");
2346 }
2347
2348 Expression VoteEqual(Operation operation) {
2349 if (!device.HasWarpIntrinsics()) {
2350 LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
2351 // We must return true here since a stub for a theoretical warp size of 1.
2352 // This will always return an equal result across all votes.
2353 return {"true", Type::Bool};
2354 }
2355 return Vote(operation, "allThreadsEqualNV");
2356 }
2357
2358 Expression ThreadId(Operation operation) {
2359 if (!device.HasShaderBallot()) {
2360 LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
2361 return {"0U", Type::Uint};
2362 }
2363 return {"gl_SubGroupInvocationARB", Type::Uint};
2364 }
2365
2366 template <const std::string_view& comparison>
2367 Expression ThreadMask(Operation) {
2368 if (device.HasWarpIntrinsics()) {
2369 return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint};
2370 }
2371 if (device.HasShaderBallot()) {
2372 return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint};
2373 }
2374 LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader");
2375 return {"0U", Type::Uint};
2376 }
2377
2378 Expression ShuffleIndexed(Operation operation) {
2379 std::string value = VisitOperand(operation, 0).AsFloat();
2380
2381 if (!device.HasShaderBallot()) {
2382 LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
2383 return {std::move(value), Type::Float};
2384 }
2385
2386 const std::string index = VisitOperand(operation, 1).AsUint();
2387 return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
2388 }
2389
2390 Expression Barrier(Operation) {
2391 if (!ir.IsDecompiled()) {
2392 LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
2393 return {};
2394 }
2395 code.AddLine("barrier();");
2396 return {};
2397 }
2398
2399 Expression MemoryBarrierGroup(Operation) {
2400 code.AddLine("groupMemoryBarrier();");
2401 return {};
2402 }
2403
2404 Expression MemoryBarrierGlobal(Operation) {
2405 code.AddLine("memoryBarrier();");
2406 return {};
2407 }
2408
2409 struct Func final {
2410 Func() = delete;
2411 ~Func() = delete;
2412
2413 static constexpr std::string_view LessThan = "<";
2414 static constexpr std::string_view Equal = "==";
2415 static constexpr std::string_view LessEqual = "<=";
2416 static constexpr std::string_view GreaterThan = ">";
2417 static constexpr std::string_view NotEqual = "!=";
2418 static constexpr std::string_view GreaterEqual = ">=";
2419
2420 static constexpr std::string_view Eq = "Eq";
2421 static constexpr std::string_view Ge = "Ge";
2422 static constexpr std::string_view Gt = "Gt";
2423 static constexpr std::string_view Le = "Le";
2424 static constexpr std::string_view Lt = "Lt";
2425
2426 static constexpr std::string_view Add = "Add";
2427 static constexpr std::string_view Min = "Min";
2428 static constexpr std::string_view Max = "Max";
2429 static constexpr std::string_view And = "And";
2430 static constexpr std::string_view Or = "Or";
2431 static constexpr std::string_view Xor = "Xor";
2432 static constexpr std::string_view Exchange = "Exchange";
2433 };
2434
2435 static constexpr std::array operation_decompilers = {
2436 &GLSLDecompiler::Assign,
2437
2438 &GLSLDecompiler::Select,
2439
2440 &GLSLDecompiler::Add<Type::Float>,
2441 &GLSLDecompiler::Mul<Type::Float>,
2442 &GLSLDecompiler::Div<Type::Float>,
2443 &GLSLDecompiler::Fma<Type::Float>,
2444 &GLSLDecompiler::Negate<Type::Float>,
2445 &GLSLDecompiler::Absolute<Type::Float>,
2446 &GLSLDecompiler::FClamp,
2447 &GLSLDecompiler::FCastHalf0,
2448 &GLSLDecompiler::FCastHalf1,
2449 &GLSLDecompiler::Min<Type::Float>,
2450 &GLSLDecompiler::Max<Type::Float>,
2451 &GLSLDecompiler::FCos,
2452 &GLSLDecompiler::FSin,
2453 &GLSLDecompiler::FExp2,
2454 &GLSLDecompiler::FLog2,
2455 &GLSLDecompiler::FInverseSqrt,
2456 &GLSLDecompiler::FSqrt,
2457 &GLSLDecompiler::FRoundEven,
2458 &GLSLDecompiler::FFloor,
2459 &GLSLDecompiler::FCeil,
2460 &GLSLDecompiler::FTrunc,
2461 &GLSLDecompiler::FCastInteger<Type::Int>,
2462 &GLSLDecompiler::FCastInteger<Type::Uint>,
2463 &GLSLDecompiler::FSwizzleAdd,
2464
2465 &GLSLDecompiler::Add<Type::Int>,
2466 &GLSLDecompiler::Mul<Type::Int>,
2467 &GLSLDecompiler::Div<Type::Int>,
2468 &GLSLDecompiler::Negate<Type::Int>,
2469 &GLSLDecompiler::Absolute<Type::Int>,
2470 &GLSLDecompiler::Min<Type::Int>,
2471 &GLSLDecompiler::Max<Type::Int>,
2472
2473 &GLSLDecompiler::ICastFloat,
2474 &GLSLDecompiler::ICastUnsigned,
2475 &GLSLDecompiler::LogicalShiftLeft<Type::Int>,
2476 &GLSLDecompiler::ILogicalShiftRight,
2477 &GLSLDecompiler::IArithmeticShiftRight,
2478 &GLSLDecompiler::BitwiseAnd<Type::Int>,
2479 &GLSLDecompiler::BitwiseOr<Type::Int>,
2480 &GLSLDecompiler::BitwiseXor<Type::Int>,
2481 &GLSLDecompiler::BitwiseNot<Type::Int>,
2482 &GLSLDecompiler::BitfieldInsert<Type::Int>,
2483 &GLSLDecompiler::BitfieldExtract<Type::Int>,
2484 &GLSLDecompiler::BitCount<Type::Int>,
2485 &GLSLDecompiler::BitMSB<Type::Int>,
2486
2487 &GLSLDecompiler::Add<Type::Uint>,
2488 &GLSLDecompiler::Mul<Type::Uint>,
2489 &GLSLDecompiler::Div<Type::Uint>,
2490 &GLSLDecompiler::Min<Type::Uint>,
2491 &GLSLDecompiler::Max<Type::Uint>,
2492 &GLSLDecompiler::UCastFloat,
2493 &GLSLDecompiler::UCastSigned,
2494 &GLSLDecompiler::LogicalShiftLeft<Type::Uint>,
2495 &GLSLDecompiler::UShiftRight,
2496 &GLSLDecompiler::UShiftRight,
2497 &GLSLDecompiler::BitwiseAnd<Type::Uint>,
2498 &GLSLDecompiler::BitwiseOr<Type::Uint>,
2499 &GLSLDecompiler::BitwiseXor<Type::Uint>,
2500 &GLSLDecompiler::BitwiseNot<Type::Uint>,
2501 &GLSLDecompiler::BitfieldInsert<Type::Uint>,
2502 &GLSLDecompiler::BitfieldExtract<Type::Uint>,
2503 &GLSLDecompiler::BitCount<Type::Uint>,
2504 &GLSLDecompiler::BitMSB<Type::Uint>,
2505
2506 &GLSLDecompiler::Add<Type::HalfFloat>,
2507 &GLSLDecompiler::Mul<Type::HalfFloat>,
2508 &GLSLDecompiler::Fma<Type::HalfFloat>,
2509 &GLSLDecompiler::Absolute<Type::HalfFloat>,
2510 &GLSLDecompiler::HNegate,
2511 &GLSLDecompiler::HClamp,
2512 &GLSLDecompiler::HCastFloat,
2513 &GLSLDecompiler::HUnpack,
2514 &GLSLDecompiler::HMergeF32,
2515 &GLSLDecompiler::HMergeH0,
2516 &GLSLDecompiler::HMergeH1,
2517 &GLSLDecompiler::HPack2,
2518
2519 &GLSLDecompiler::LogicalAssign,
2520 &GLSLDecompiler::LogicalAnd,
2521 &GLSLDecompiler::LogicalOr,
2522 &GLSLDecompiler::LogicalXor,
2523 &GLSLDecompiler::LogicalNegate,
2524 &GLSLDecompiler::LogicalPick2,
2525 &GLSLDecompiler::LogicalAnd2,
2526
2527 &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, false>,
2528 &GLSLDecompiler::Comparison<Func::Equal, Type::Float, false>,
2529 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, false>,
2530 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, false>,
2531 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, false>,
2532 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, false>,
2533 &GLSLDecompiler::FOrdered,
2534 &GLSLDecompiler::FUnordered,
2535 &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, true>,
2536 &GLSLDecompiler::Comparison<Func::Equal, Type::Float, true>,
2537 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, true>,
2538 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, true>,
2539 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, true>,
2540 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, true>,
2541
2542 &GLSLDecompiler::Comparison<Func::LessThan, Type::Int>,
2543 &GLSLDecompiler::Comparison<Func::Equal, Type::Int>,
2544 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Int>,
2545 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Int>,
2546 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Int>,
2547 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Int>,
2548
2549 &GLSLDecompiler::Comparison<Func::LessThan, Type::Uint>,
2550 &GLSLDecompiler::Comparison<Func::Equal, Type::Uint>,
2551 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Uint>,
2552 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Uint>,
2553 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Uint>,
2554 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Uint>,
2555
2556 &GLSLDecompiler::LogicalAddCarry,
2557
2558 &GLSLDecompiler::Logical2HLessThan<false>,
2559 &GLSLDecompiler::Logical2HEqual<false>,
2560 &GLSLDecompiler::Logical2HLessEqual<false>,
2561 &GLSLDecompiler::Logical2HGreaterThan<false>,
2562 &GLSLDecompiler::Logical2HNotEqual<false>,
2563 &GLSLDecompiler::Logical2HGreaterEqual<false>,
2564 &GLSLDecompiler::Logical2HLessThan<true>,
2565 &GLSLDecompiler::Logical2HEqual<true>,
2566 &GLSLDecompiler::Logical2HLessEqual<true>,
2567 &GLSLDecompiler::Logical2HGreaterThan<true>,
2568 &GLSLDecompiler::Logical2HNotEqual<true>,
2569 &GLSLDecompiler::Logical2HGreaterEqual<true>,
2570
2571 &GLSLDecompiler::Texture,
2572 &GLSLDecompiler::TextureLod,
2573 &GLSLDecompiler::TextureGather,
2574 &GLSLDecompiler::TextureQueryDimensions,
2575 &GLSLDecompiler::TextureQueryLod,
2576 &GLSLDecompiler::TexelFetch,
2577 &GLSLDecompiler::TextureGradient,
2578
2579 &GLSLDecompiler::ImageLoad,
2580 &GLSLDecompiler::ImageStore,
2581
2582 &GLSLDecompiler::AtomicImage<Func::Add>,
2583 &GLSLDecompiler::AtomicImage<Func::And>,
2584 &GLSLDecompiler::AtomicImage<Func::Or>,
2585 &GLSLDecompiler::AtomicImage<Func::Xor>,
2586 &GLSLDecompiler::AtomicImage<Func::Exchange>,
2587
2588 &GLSLDecompiler::Atomic<Func::Exchange, Type::Uint>,
2589 &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
2590 &GLSLDecompiler::Atomic<Func::Min, Type::Uint>,
2591 &GLSLDecompiler::Atomic<Func::Max, Type::Uint>,
2592 &GLSLDecompiler::Atomic<Func::And, Type::Uint>,
2593 &GLSLDecompiler::Atomic<Func::Or, Type::Uint>,
2594 &GLSLDecompiler::Atomic<Func::Xor, Type::Uint>,
2595
2596 &GLSLDecompiler::Atomic<Func::Exchange, Type::Int>,
2597 &GLSLDecompiler::Atomic<Func::Add, Type::Int>,
2598 &GLSLDecompiler::Atomic<Func::Min, Type::Int>,
2599 &GLSLDecompiler::Atomic<Func::Max, Type::Int>,
2600 &GLSLDecompiler::Atomic<Func::And, Type::Int>,
2601 &GLSLDecompiler::Atomic<Func::Or, Type::Int>,
2602 &GLSLDecompiler::Atomic<Func::Xor, Type::Int>,
2603
2604 &GLSLDecompiler::Reduce<Func::Add, Type::Uint>,
2605 &GLSLDecompiler::Reduce<Func::Min, Type::Uint>,
2606 &GLSLDecompiler::Reduce<Func::Max, Type::Uint>,
2607 &GLSLDecompiler::Reduce<Func::And, Type::Uint>,
2608 &GLSLDecompiler::Reduce<Func::Or, Type::Uint>,
2609 &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>,
2610
2611 &GLSLDecompiler::Reduce<Func::Add, Type::Int>,
2612 &GLSLDecompiler::Reduce<Func::Min, Type::Int>,
2613 &GLSLDecompiler::Reduce<Func::Max, Type::Int>,
2614 &GLSLDecompiler::Reduce<Func::And, Type::Int>,
2615 &GLSLDecompiler::Reduce<Func::Or, Type::Int>,
2616 &GLSLDecompiler::Reduce<Func::Xor, Type::Int>,
2617
2618 &GLSLDecompiler::Branch,
2619 &GLSLDecompiler::BranchIndirect,
2620 &GLSLDecompiler::PushFlowStack,
2621 &GLSLDecompiler::PopFlowStack,
2622 &GLSLDecompiler::Exit,
2623 &GLSLDecompiler::Discard,
2624
2625 &GLSLDecompiler::EmitVertex,
2626 &GLSLDecompiler::EndPrimitive,
2627
2628 &GLSLDecompiler::InvocationId,
2629 &GLSLDecompiler::YNegate,
2630 &GLSLDecompiler::LocalInvocationId<0>,
2631 &GLSLDecompiler::LocalInvocationId<1>,
2632 &GLSLDecompiler::LocalInvocationId<2>,
2633 &GLSLDecompiler::WorkGroupId<0>,
2634 &GLSLDecompiler::WorkGroupId<1>,
2635 &GLSLDecompiler::WorkGroupId<2>,
2636
2637 &GLSLDecompiler::BallotThread,
2638 &GLSLDecompiler::VoteAll,
2639 &GLSLDecompiler::VoteAny,
2640 &GLSLDecompiler::VoteEqual,
2641
2642 &GLSLDecompiler::ThreadId,
2643 &GLSLDecompiler::ThreadMask<Func::Eq>,
2644 &GLSLDecompiler::ThreadMask<Func::Ge>,
2645 &GLSLDecompiler::ThreadMask<Func::Gt>,
2646 &GLSLDecompiler::ThreadMask<Func::Le>,
2647 &GLSLDecompiler::ThreadMask<Func::Lt>,
2648 &GLSLDecompiler::ShuffleIndexed,
2649
2650 &GLSLDecompiler::Barrier,
2651 &GLSLDecompiler::MemoryBarrierGroup,
2652 &GLSLDecompiler::MemoryBarrierGlobal,
2653 };
2654 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2655
2656 std::string GetRegister(u32 index) const {
2657 return AppendSuffix(index, "gpr");
2658 }
2659
2660 std::string GetCustomVariable(u32 index) const {
2661 return AppendSuffix(index, "custom_var");
2662 }
2663
2664 std::string GetPredicate(Tegra::Shader::Pred pred) const {
2665 return AppendSuffix(static_cast<u32>(pred), "pred");
2666 }
2667
2668 std::string GetGenericInputAttribute(Attribute::Index attribute) const {
2669 return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
2670 }
2671
2672 std::unordered_map<u8, GenericVaryingDescription> varying_description;
2673
2674 std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
2675 const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
2676 const auto& description = varying_description.at(offset);
2677 if (description.is_scalar) {
2678 return description.name;
2679 }
2680 return fmt::format("{}[{}]", description.name, element - description.first_element);
2681 }
2682
2683 std::string GetConstBuffer(u32 index) const {
2684 return AppendSuffix(index, "cbuf");
2685 }
2686
2687 std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
2688 return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix);
2689 }
2690
2691 std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const {
2692 return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset,
2693 suffix);
2694 }
2695
2696 std::string GetConstBufferBlock(u32 index) const {
2697 return AppendSuffix(index, "cbuf_block");
2698 }
2699
2700 std::string GetLocalMemory() const {
2701 if (suffix.empty()) {
2702 return "lmem";
2703 } else {
2704 return "lmem_" + std::string{suffix};
2705 }
2706 }
2707
2708 std::string GetInternalFlag(InternalFlag flag) const {
2709 constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
2710 "overflow_flag"};
2711 const auto index = static_cast<u32>(flag);
2712 ASSERT(index < static_cast<u32>(InternalFlag::Amount));
2713
2714 if (suffix.empty()) {
2715 return InternalFlagNames[index];
2716 } else {
2717 return fmt::format("{}_{}", InternalFlagNames[index], suffix);
2718 }
2719 }
2720
2721 std::string GetSampler(const SamplerEntry& sampler) const {
2722 return AppendSuffix(sampler.index, "sampler");
2723 }
2724
2725 std::string GetImage(const ImageEntry& image) const {
2726 return AppendSuffix(image.index, "image");
2727 }
2728
2729 std::string AppendSuffix(u32 index, std::string_view name) const {
2730 if (suffix.empty()) {
2731 return fmt::format("{}{}", name, index);
2732 } else {
2733 return fmt::format("{}{}_{}", name, index, suffix);
2734 }
2735 }
2736
2737 u32 GetNumPhysicalInputAttributes() const {
2738 return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
2739 }
2740
2741 u32 GetNumPhysicalAttributes() const {
2742 return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes);
2743 }
2744
2745 u32 GetNumPhysicalVaryings() const {
2746 return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
2747 }
2748
2749 const Device& device;
2750 const ShaderIR& ir;
2751 const Registry& registry;
2752 const ShaderType stage;
2753 const std::string_view identifier;
2754 const std::string_view suffix;
2755 const Header header;
2756 std::unordered_map<u8, VaryingTFB> transform_feedback;
2757
2758 ShaderWriter code;
2759
2760 std::optional<u32> max_input_vertices;
2761};
2762
2763std::string GetFlowVariable(u32 index) {
2764 return fmt::format("flow_var{}", index);
2765}
2766
2767class ExprDecompiler {
2768public:
2769 explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
2770
2771 void operator()(const ExprAnd& expr) {
2772 inner += '(';
2773 std::visit(*this, *expr.operand1);
2774 inner += " && ";
2775 std::visit(*this, *expr.operand2);
2776 inner += ')';
2777 }
2778
2779 void operator()(const ExprOr& expr) {
2780 inner += '(';
2781 std::visit(*this, *expr.operand1);
2782 inner += " || ";
2783 std::visit(*this, *expr.operand2);
2784 inner += ')';
2785 }
2786
2787 void operator()(const ExprNot& expr) {
2788 inner += '!';
2789 std::visit(*this, *expr.operand1);
2790 }
2791
2792 void operator()(const ExprPredicate& expr) {
2793 const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
2794 inner += decomp.GetPredicate(pred);
2795 }
2796
2797 void operator()(const ExprCondCode& expr) {
2798 inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool();
2799 }
2800
2801 void operator()(const ExprVar& expr) {
2802 inner += GetFlowVariable(expr.var_index);
2803 }
2804
2805 void operator()(const ExprBoolean& expr) {
2806 inner += expr.value ? "true" : "false";
2807 }
2808
2809 void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
2810 inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value);
2811 }
2812
2813 const std::string& GetResult() const {
2814 return inner;
2815 }
2816
2817private:
2818 GLSLDecompiler& decomp;
2819 std::string inner;
2820};
2821
2822class ASTDecompiler {
2823public:
2824 explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
2825
2826 void operator()(const ASTProgram& ast) {
2827 ASTNode current = ast.nodes.GetFirst();
2828 while (current) {
2829 Visit(current);
2830 current = current->GetNext();
2831 }
2832 }
2833
2834 void operator()(const ASTIfThen& ast) {
2835 ExprDecompiler expr_parser{decomp};
2836 std::visit(expr_parser, *ast.condition);
2837 decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
2838 decomp.code.scope++;
2839 ASTNode current = ast.nodes.GetFirst();
2840 while (current) {
2841 Visit(current);
2842 current = current->GetNext();
2843 }
2844 decomp.code.scope--;
2845 decomp.code.AddLine("}}");
2846 }
2847
2848 void operator()(const ASTIfElse& ast) {
2849 decomp.code.AddLine("else {{");
2850 decomp.code.scope++;
2851 ASTNode current = ast.nodes.GetFirst();
2852 while (current) {
2853 Visit(current);
2854 current = current->GetNext();
2855 }
2856 decomp.code.scope--;
2857 decomp.code.AddLine("}}");
2858 }
2859
2860 void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
2861 UNREACHABLE();
2862 }
2863
2864 void operator()(const ASTBlockDecoded& ast) {
2865 decomp.VisitBlock(ast.nodes);
2866 }
2867
2868 void operator()(const ASTVarSet& ast) {
2869 ExprDecompiler expr_parser{decomp};
2870 std::visit(expr_parser, *ast.condition);
2871 decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult());
2872 }
2873
2874 void operator()(const ASTLabel& ast) {
2875 decomp.code.AddLine("// Label_{}:", ast.index);
2876 }
2877
2878 void operator()([[maybe_unused]] const ASTGoto& ast) {
2879 UNREACHABLE();
2880 }
2881
2882 void operator()(const ASTDoWhile& ast) {
2883 ExprDecompiler expr_parser{decomp};
2884 std::visit(expr_parser, *ast.condition);
2885 decomp.code.AddLine("do {{");
2886 decomp.code.scope++;
2887 ASTNode current = ast.nodes.GetFirst();
2888 while (current) {
2889 Visit(current);
2890 current = current->GetNext();
2891 }
2892 decomp.code.scope--;
2893 decomp.code.AddLine("}} while({});", expr_parser.GetResult());
2894 }
2895
2896 void operator()(const ASTReturn& ast) {
2897 const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
2898 if (!is_true) {
2899 ExprDecompiler expr_parser{decomp};
2900 std::visit(expr_parser, *ast.condition);
2901 decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
2902 decomp.code.scope++;
2903 }
2904 if (ast.kills) {
2905 decomp.code.AddLine("discard;");
2906 } else {
2907 decomp.PreExit();
2908 decomp.code.AddLine("return;");
2909 }
2910 if (!is_true) {
2911 decomp.code.scope--;
2912 decomp.code.AddLine("}}");
2913 }
2914 }
2915
2916 void operator()(const ASTBreak& ast) {
2917 const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
2918 if (!is_true) {
2919 ExprDecompiler expr_parser{decomp};
2920 std::visit(expr_parser, *ast.condition);
2921 decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
2922 decomp.code.scope++;
2923 }
2924 decomp.code.AddLine("break;");
2925 if (!is_true) {
2926 decomp.code.scope--;
2927 decomp.code.AddLine("}}");
2928 }
2929 }
2930
2931 void Visit(const ASTNode& node) {
2932 std::visit(*this, *node->GetInnerData());
2933 }
2934
2935private:
2936 GLSLDecompiler& decomp;
2937};
2938
2939void GLSLDecompiler::DecompileAST() {
2940 const u32 num_flow_variables = ir.GetASTNumVariables();
2941 for (u32 i = 0; i < num_flow_variables; i++) {
2942 code.AddLine("bool {} = false;", GetFlowVariable(i));
2943 }
2944
2945 ASTDecompiler decompiler{*this};
2946 decompiler.Visit(ir.GetASTProgram());
2947}
2948
2949} // Anonymous namespace
2950
2951ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) {
2952 ShaderEntries entries;
2953 for (const auto& cbuf : ir.GetConstantBuffers()) {
2954 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
2955 cbuf.first);
2956 }
2957 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
2958 entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read,
2959 usage.is_written);
2960 }
2961 for (const auto& sampler : ir.GetSamplers()) {
2962 entries.samplers.emplace_back(sampler);
2963 }
2964 for (const auto& image : ir.GetImages()) {
2965 entries.images.emplace_back(image);
2966 }
2967 const auto clip_distances = ir.GetClipDistances();
2968 for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
2969 entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
2970 }
2971 for (const auto& buffer : entries.const_buffers) {
2972 entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
2973 }
2974 entries.shader_length = ir.GetLength();
2975 return entries;
2976}
2977
2978std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
2979 ShaderType stage, std::string_view identifier,
2980 std::string_view suffix) {
2981 GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix);
2982 decompiler.Decompile();
2983 return decompiler.GetResult();
2984}
2985
2986} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
deleted file mode 100644
index 0397a000c..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ /dev/null
@@ -1,69 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <string>
9#include <string_view>
10#include <utility>
11#include <vector>
12#include "common/common_types.h"
13#include "video_core/engines/maxwell_3d.h"
14#include "video_core/engines/shader_type.h"
15#include "video_core/shader/registry.h"
16#include "video_core/shader/shader_ir.h"
17
18namespace OpenGL {
19
20class Device;
21
22using Maxwell = Tegra::Engines::Maxwell3D::Regs;
23using SamplerEntry = VideoCommon::Shader::SamplerEntry;
24using ImageEntry = VideoCommon::Shader::ImageEntry;
25
26class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
27public:
28 explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_)
29 : ConstBuffer{max_offset_, is_indirect_}, index{index_} {}
30
31 u32 GetIndex() const {
32 return index;
33 }
34
35private:
36 u32 index = 0;
37};
38
39struct GlobalMemoryEntry {
40 constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_,
41 bool is_written_)
42 : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{
43 is_written_} {}
44
45 u32 cbuf_index = 0;
46 u32 cbuf_offset = 0;
47 bool is_read = false;
48 bool is_written = false;
49};
50
51struct ShaderEntries {
52 std::vector<ConstBufferEntry> const_buffers;
53 std::vector<GlobalMemoryEntry> global_memory_entries;
54 std::vector<SamplerEntry> samplers;
55 std::vector<ImageEntry> images;
56 std::size_t shader_length{};
57 u32 clip_distances{};
58 u32 enabled_uniform_buffers{};
59};
60
61ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
62 Tegra::Engines::ShaderType stage);
63
64std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
65 const VideoCommon::Shader::Registry& registry,
66 Tegra::Engines::ShaderType stage, std::string_view identifier,
67 std::string_view suffix = {});
68
69} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
deleted file mode 100644
index 0deb86517..000000000
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ /dev/null
@@ -1,482 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "common/fs/file.h"
12#include "common/fs/fs.h"
13#include "common/fs/path_util.h"
14#include "common/logging/log.h"
15#include "common/scm_rev.h"
16#include "common/settings.h"
17#include "common/zstd_compression.h"
18#include "core/core.h"
19#include "core/hle/kernel/k_process.h"
20#include "video_core/engines/shader_type.h"
21#include "video_core/renderer_opengl/gl_shader_cache.h"
22#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
23
24namespace OpenGL {
25
26using Tegra::Engines::ShaderType;
27using VideoCommon::Shader::BindlessSamplerMap;
28using VideoCommon::Shader::BoundSamplerMap;
29using VideoCommon::Shader::KeyMap;
30using VideoCommon::Shader::SeparateSamplerKey;
31using ShaderCacheVersionHash = std::array<u8, 64>;
32
33struct ConstBufferKey {
34 u32 cbuf = 0;
35 u32 offset = 0;
36 u32 value = 0;
37};
38
39struct BoundSamplerEntry {
40 u32 offset = 0;
41 Tegra::Engines::SamplerDescriptor sampler;
42};
43
44struct SeparateSamplerEntry {
45 u32 cbuf1 = 0;
46 u32 cbuf2 = 0;
47 u32 offset1 = 0;
48 u32 offset2 = 0;
49 Tegra::Engines::SamplerDescriptor sampler;
50};
51
52struct BindlessSamplerEntry {
53 u32 cbuf = 0;
54 u32 offset = 0;
55 Tegra::Engines::SamplerDescriptor sampler;
56};
57
58namespace {
59
60constexpr u32 NativeVersion = 21;
61
62ShaderCacheVersionHash GetShaderCacheVersionHash() {
63 ShaderCacheVersionHash hash{};
64 const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
65 std::memcpy(hash.data(), Common::g_shader_cache_version, length);
66 return hash;
67}
68
69} // Anonymous namespace
70
71ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
72
73ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
74
75bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
76 if (!file.ReadObject(type)) {
77 return false;
78 }
79 u32 code_size;
80 u32 code_size_b;
81 if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) {
82 return false;
83 }
84 code.resize(code_size);
85 code_b.resize(code_size_b);
86 if (file.Read(code) != code_size) {
87 return false;
88 }
89 if (HasProgramA() && file.Read(code_b) != code_size_b) {
90 return false;
91 }
92
93 u8 is_texture_handler_size_known;
94 u32 texture_handler_size_value;
95 u32 num_keys;
96 u32 num_bound_samplers;
97 u32 num_separate_samplers;
98 u32 num_bindless_samplers;
99 if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) ||
100 !file.ReadObject(is_texture_handler_size_known) ||
101 !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) ||
102 !file.ReadObject(compute_info) || !file.ReadObject(num_keys) ||
103 !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) ||
104 !file.ReadObject(num_bindless_samplers)) {
105 return false;
106 }
107 if (is_texture_handler_size_known) {
108 texture_handler_size = texture_handler_size_value;
109 }
110
111 std::vector<ConstBufferKey> flat_keys(num_keys);
112 std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
113 std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
114 std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
115 if (file.Read(flat_keys) != flat_keys.size() ||
116 file.Read(flat_bound_samplers) != flat_bound_samplers.size() ||
117 file.Read(flat_separate_samplers) != flat_separate_samplers.size() ||
118 file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) {
119 return false;
120 }
121 for (const auto& entry : flat_keys) {
122 keys.insert({{entry.cbuf, entry.offset}, entry.value});
123 }
124 for (const auto& entry : flat_bound_samplers) {
125 bound_samplers.emplace(entry.offset, entry.sampler);
126 }
127 for (const auto& entry : flat_separate_samplers) {
128 SeparateSamplerKey key;
129 key.buffers = {entry.cbuf1, entry.cbuf2};
130 key.offsets = {entry.offset1, entry.offset2};
131 separate_samplers.emplace(key, entry.sampler);
132 }
133 for (const auto& entry : flat_bindless_samplers) {
134 bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
135 }
136
137 return true;
138}
139
140bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
141 if (!file.WriteObject(static_cast<u32>(type)) ||
142 !file.WriteObject(static_cast<u32>(code.size())) ||
143 !file.WriteObject(static_cast<u32>(code_b.size()))) {
144 return false;
145 }
146 if (file.Write(code) != code.size()) {
147 return false;
148 }
149 if (HasProgramA() && file.Write(code_b) != code_b.size()) {
150 return false;
151 }
152
153 if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) ||
154 !file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) ||
155 !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) ||
156 !file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) ||
157 !file.WriteObject(static_cast<u32>(bound_samplers.size())) ||
158 !file.WriteObject(static_cast<u32>(separate_samplers.size())) ||
159 !file.WriteObject(static_cast<u32>(bindless_samplers.size()))) {
160 return false;
161 }
162
163 std::vector<ConstBufferKey> flat_keys;
164 flat_keys.reserve(keys.size());
165 for (const auto& [address, value] : keys) {
166 flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
167 }
168
169 std::vector<BoundSamplerEntry> flat_bound_samplers;
170 flat_bound_samplers.reserve(bound_samplers.size());
171 for (const auto& [address, sampler] : bound_samplers) {
172 flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
173 }
174
175 std::vector<SeparateSamplerEntry> flat_separate_samplers;
176 flat_separate_samplers.reserve(separate_samplers.size());
177 for (const auto& [key, sampler] : separate_samplers) {
178 SeparateSamplerEntry entry;
179 std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
180 std::tie(entry.offset1, entry.offset2) = key.offsets;
181 entry.sampler = sampler;
182 flat_separate_samplers.push_back(entry);
183 }
184
185 std::vector<BindlessSamplerEntry> flat_bindless_samplers;
186 flat_bindless_samplers.reserve(bindless_samplers.size());
187 for (const auto& [address, sampler] : bindless_samplers) {
188 flat_bindless_samplers.push_back(
189 BindlessSamplerEntry{address.first, address.second, sampler});
190 }
191
192 return file.Write(flat_keys) == flat_keys.size() &&
193 file.Write(flat_bound_samplers) == flat_bound_samplers.size() &&
194 file.Write(flat_separate_samplers) == flat_separate_samplers.size() &&
195 file.Write(flat_bindless_samplers) == flat_bindless_samplers.size();
196}
197
198ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
199
200ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
201
202void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
203 title_id = title_id_;
204}
205
206std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
207 // Skip games without title id
208 const bool has_title_id = title_id != 0;
209 if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
210 return std::nullopt;
211 }
212
213 Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read,
214 Common::FS::FileType::BinaryFile};
215 if (!file.IsOpen()) {
216 LOG_INFO(Render_OpenGL, "No transferable shader cache found");
217 is_usable = true;
218 return std::nullopt;
219 }
220
221 u32 version{};
222 if (!file.ReadObject(version)) {
223 LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
224 return std::nullopt;
225 }
226
227 if (version < NativeVersion) {
228 LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
229 file.Close();
230 InvalidateTransferable();
231 is_usable = true;
232 return std::nullopt;
233 }
234 if (version > NativeVersion) {
235 LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
236 "of the emulator, skipping");
237 return std::nullopt;
238 }
239
240 // Version is valid, load the shaders
241 std::vector<ShaderDiskCacheEntry> entries;
242 while (static_cast<u64>(file.Tell()) < file.GetSize()) {
243 ShaderDiskCacheEntry& entry = entries.emplace_back();
244 if (!entry.Load(file)) {
245 LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
246 return std::nullopt;
247 }
248 }
249
250 is_usable = true;
251 return {std::move(entries)};
252}
253
254std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
255 if (!is_usable) {
256 return {};
257 }
258
259 Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read,
260 Common::FS::FileType::BinaryFile};
261 if (!file.IsOpen()) {
262 LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
263 return {};
264 }
265
266 if (const auto result = LoadPrecompiledFile(file)) {
267 return *result;
268 }
269
270 LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
271 file.Close();
272 InvalidatePrecompiled();
273 return {};
274}
275
276std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
277 Common::FS::IOFile& file) {
278 // Read compressed file from disk and decompress to virtual precompiled cache file
279 std::vector<u8> compressed(file.GetSize());
280 if (file.Read(compressed) != file.GetSize()) {
281 return std::nullopt;
282 }
283 const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
284 SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
285 precompiled_cache_virtual_file_offset = 0;
286
287 ShaderCacheVersionHash file_hash{};
288 if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
289 precompiled_cache_virtual_file_offset = 0;
290 return std::nullopt;
291 }
292 if (GetShaderCacheVersionHash() != file_hash) {
293 LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
294 precompiled_cache_virtual_file_offset = 0;
295 return std::nullopt;
296 }
297
298 std::vector<ShaderDiskCachePrecompiled> entries;
299 while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
300 u32 binary_size;
301 auto& entry = entries.emplace_back();
302 if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
303 !LoadObjectFromPrecompiled(entry.binary_format) ||
304 !LoadObjectFromPrecompiled(binary_size)) {
305 return std::nullopt;
306 }
307
308 entry.binary.resize(binary_size);
309 if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
310 return std::nullopt;
311 }
312 }
313 return entries;
314}
315
316void ShaderDiskCacheOpenGL::InvalidateTransferable() {
317 if (!Common::FS::RemoveFile(GetTransferablePath())) {
318 LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
319 Common::FS::PathToUTF8String(GetTransferablePath()));
320 }
321 InvalidatePrecompiled();
322}
323
324void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
325 // Clear virtaul precompiled cache file
326 precompiled_cache_virtual_file.Resize(0);
327
328 if (!Common::FS::RemoveFile(GetPrecompiledPath())) {
329 LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}",
330 Common::FS::PathToUTF8String(GetPrecompiledPath()));
331 }
332}
333
334void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
335 if (!is_usable) {
336 return;
337 }
338
339 const u64 id = entry.unique_identifier;
340 if (stored_transferable.contains(id)) {
341 // The shader already exists
342 return;
343 }
344
345 Common::FS::IOFile file = AppendTransferableFile();
346 if (!file.IsOpen()) {
347 return;
348 }
349 if (!entry.Save(file)) {
350 LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
351 file.Close();
352 InvalidateTransferable();
353 return;
354 }
355
356 stored_transferable.insert(id);
357}
358
359void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
360 if (!is_usable) {
361 return;
362 }
363
364 // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
365 // when writing the dump. This should be done the moment I get access to write to the virtual
366 // file.
367 if (precompiled_cache_virtual_file.GetSize() == 0) {
368 SavePrecompiledHeaderToVirtualPrecompiledCache();
369 }
370
371 GLint binary_length;
372 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
373
374 GLenum binary_format;
375 std::vector<u8> binary(binary_length);
376 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
377
378 if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
379 !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
380 !SaveArrayToPrecompiled(binary.data(), binary.size())) {
381 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
382 unique_identifier);
383 InvalidatePrecompiled();
384 }
385}
386
387Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
388 if (!EnsureDirectories()) {
389 return {};
390 }
391
392 const auto transferable_path{GetTransferablePath()};
393 const bool existed = Common::FS::Exists(transferable_path);
394
395 Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append,
396 Common::FS::FileType::BinaryFile};
397 if (!file.IsOpen()) {
398 LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}",
399 Common::FS::PathToUTF8String(transferable_path));
400 return {};
401 }
402 if (!existed || file.GetSize() == 0) {
403 // If the file didn't exist, write its version
404 if (!file.WriteObject(NativeVersion)) {
405 LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
406 Common::FS::PathToUTF8String(transferable_path));
407 return {};
408 }
409 }
410 return file;
411}
412
413void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
414 const auto hash{GetShaderCacheVersionHash()};
415 if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
416 LOG_ERROR(
417 Render_OpenGL,
418 "Failed to write precompiled cache version hash to virtual precompiled cache file");
419 }
420}
421
422void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
423 precompiled_cache_virtual_file_offset = 0;
424 const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
425 const std::vector<u8> compressed =
426 Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
427
428 const auto precompiled_path = GetPrecompiledPath();
429 Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write,
430 Common::FS::FileType::BinaryFile};
431
432 if (!file.IsOpen()) {
433 LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}",
434 Common::FS::PathToUTF8String(precompiled_path));
435 return;
436 }
437 if (file.Write(compressed) != compressed.size()) {
438 LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
439 Common::FS::PathToUTF8String(precompiled_path));
440 }
441}
442
443bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
444 const auto CreateDir = [](const std::filesystem::path& dir) {
445 if (!Common::FS::CreateDir(dir)) {
446 LOG_ERROR(Render_OpenGL, "Failed to create directory={}",
447 Common::FS::PathToUTF8String(dir));
448 return false;
449 }
450 return true;
451 };
452
453 return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) &&
454 CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
455 CreateDir(GetPrecompiledDir());
456}
457
458std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const {
459 return GetTransferableDir() / fmt::format("{}.bin", GetTitleID());
460}
461
462std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
463 return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID());
464}
465
466std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const {
467 return GetBaseDir() / "transferable";
468}
469
470std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
471 return GetBaseDir() / "precompiled";
472}
473
474std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const {
475 return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl";
476}
477
478std::string ShaderDiskCacheOpenGL::GetTitleID() const {
479 return fmt::format("{:016X}", title_id);
480}
481
482} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
deleted file mode 100644
index f8bc23868..000000000
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ /dev/null
@@ -1,176 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <filesystem>
8#include <optional>
9#include <string>
10#include <tuple>
11#include <type_traits>
12#include <unordered_map>
13#include <unordered_set>
14#include <utility>
15#include <vector>
16
17#include <glad/glad.h>
18
19#include "common/assert.h"
20#include "common/common_types.h"
21#include "core/file_sys/vfs_vector.h"
22#include "video_core/engines/shader_type.h"
23#include "video_core/shader/registry.h"
24
25namespace Common::FS {
26class IOFile;
27}
28
29namespace OpenGL {
30
31using ProgramCode = std::vector<u64>;
32
33/// Describes a shader and how it's used by the guest GPU
34struct ShaderDiskCacheEntry {
35 ShaderDiskCacheEntry();
36 ~ShaderDiskCacheEntry();
37
38 bool Load(Common::FS::IOFile& file);
39
40 bool Save(Common::FS::IOFile& file) const;
41
42 bool HasProgramA() const {
43 return !code.empty() && !code_b.empty();
44 }
45
46 Tegra::Engines::ShaderType type{};
47 ProgramCode code;
48 ProgramCode code_b;
49
50 u64 unique_identifier = 0;
51 std::optional<u32> texture_handler_size;
52 u32 bound_buffer = 0;
53 VideoCommon::Shader::GraphicsInfo graphics_info;
54 VideoCommon::Shader::ComputeInfo compute_info;
55 VideoCommon::Shader::KeyMap keys;
56 VideoCommon::Shader::BoundSamplerMap bound_samplers;
57 VideoCommon::Shader::SeparateSamplerMap separate_samplers;
58 VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
59};
60
61/// Contains an OpenGL dumped binary program
62struct ShaderDiskCachePrecompiled {
63 u64 unique_identifier = 0;
64 GLenum binary_format = 0;
65 std::vector<u8> binary;
66};
67
68class ShaderDiskCacheOpenGL {
69public:
70 explicit ShaderDiskCacheOpenGL();
71 ~ShaderDiskCacheOpenGL();
72
73 /// Binds a title ID for all future operations.
74 void BindTitleID(u64 title_id);
75
76 /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
77 std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
78
79 /// Loads current game's precompiled cache. Invalidates on failure.
80 std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
81
82 /// Removes the transferable (and precompiled) cache file.
83 void InvalidateTransferable();
84
85 /// Removes the precompiled cache file and clears virtual precompiled cache file.
86 void InvalidatePrecompiled();
87
88 /// Saves a raw dump to the transferable file. Checks for collisions.
89 void SaveEntry(const ShaderDiskCacheEntry& entry);
90
91 /// Saves a dump entry to the precompiled file. Does not check for collisions.
92 void SavePrecompiled(u64 unique_identifier, GLuint program);
93
94 /// Serializes virtual precompiled shader cache file to real file
95 void SaveVirtualPrecompiledFile();
96
97private:
98 /// Loads the transferable cache. Returns empty on failure.
99 std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
100 Common::FS::IOFile& file);
101
102 /// Opens current game's transferable file and write it's header if it doesn't exist
103 Common::FS::IOFile AppendTransferableFile() const;
104
105 /// Save precompiled header to precompiled_cache_in_memory
106 void SavePrecompiledHeaderToVirtualPrecompiledCache();
107
108 /// Create shader disk cache directories. Returns true on success.
109 bool EnsureDirectories() const;
110
111 /// Gets current game's transferable file path
112 std::filesystem::path GetTransferablePath() const;
113
114 /// Gets current game's precompiled file path
115 std::filesystem::path GetPrecompiledPath() const;
116
117 /// Get user's transferable directory path
118 std::filesystem::path GetTransferableDir() const;
119
120 /// Get user's precompiled directory path
121 std::filesystem::path GetPrecompiledDir() const;
122
123 /// Get user's shader directory path
124 std::filesystem::path GetBaseDir() const;
125
126 /// Get current game's title id
127 std::string GetTitleID() const;
128
129 template <typename T>
130 bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
131 const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
132 data, length, precompiled_cache_virtual_file_offset);
133 precompiled_cache_virtual_file_offset += write_length;
134 return write_length == sizeof(T) * length;
135 }
136
137 template <typename T>
138 bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
139 const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
140 data, length, precompiled_cache_virtual_file_offset);
141 precompiled_cache_virtual_file_offset += read_length;
142 return read_length == sizeof(T) * length;
143 }
144
145 template <typename T>
146 bool SaveObjectToPrecompiled(const T& object) {
147 return SaveArrayToPrecompiled(&object, 1);
148 }
149
150 bool SaveObjectToPrecompiled(bool object) {
151 const auto value = static_cast<u8>(object);
152 return SaveArrayToPrecompiled(&value, 1);
153 }
154
155 template <typename T>
156 bool LoadObjectFromPrecompiled(T& object) {
157 return LoadArrayFromPrecompiled(&object, 1);
158 }
159
160 // Stores whole precompiled cache which will be read from or saved to the precompiled chache
161 // file
162 FileSys::VectorVfsFile precompiled_cache_virtual_file;
163 // Stores the current offset of the precompiled cache file for IO purposes
164 std::size_t precompiled_cache_virtual_file_offset = 0;
165
166 // Stored transferable shaders
167 std::unordered_set<u64> stored_transferable;
168
169 /// Title ID to operate on
170 u64 title_id = 0;
171
172 // The cache has been loaded at boot
173 bool is_usable = false;
174};
175
176} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 553e6e8d6..399959afb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -1,149 +1,3 @@
1// Copyright 2018 yuzu Emulator Project 1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4
5#include <glad/glad.h>
6
7#include "common/common_types.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_opengl/gl_device.h"
10#include "video_core/renderer_opengl/gl_shader_manager.h"
11
12namespace OpenGL {
13
14namespace {
15
16void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
17 if (current == old) {
18 return;
19 }
20 if (current == 0) {
21 if (enabled) {
22 enabled = false;
23 glDisable(stage);
24 }
25 return;
26 }
27 if (!enabled) {
28 enabled = true;
29 glEnable(stage);
30 }
31 glBindProgramARB(stage, current);
32}
33
34} // Anonymous namespace
35
36ProgramManager::ProgramManager(const Device& device)
37 : use_assembly_programs{device.UseAssemblyShaders()} {
38 if (use_assembly_programs) {
39 glEnable(GL_COMPUTE_PROGRAM_NV);
40 } else {
41 graphics_pipeline.Create();
42 glBindProgramPipeline(graphics_pipeline.handle);
43 }
44}
45
46ProgramManager::~ProgramManager() = default;
47
48void ProgramManager::BindCompute(GLuint program) {
49 if (use_assembly_programs) {
50 glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
51 } else {
52 is_graphics_bound = false;
53 glUseProgram(program);
54 }
55}
56
57void ProgramManager::BindGraphicsPipeline() {
58 if (!use_assembly_programs) {
59 UpdateSourcePrograms();
60 }
61}
62
63void ProgramManager::BindHostPipeline(GLuint pipeline) {
64 if (use_assembly_programs) {
65 if (geometry_enabled) {
66 geometry_enabled = false;
67 old_state.geometry = 0;
68 glDisable(GL_GEOMETRY_PROGRAM_NV);
69 }
70 } else {
71 if (!is_graphics_bound) {
72 glUseProgram(0);
73 }
74 }
75 glBindProgramPipeline(pipeline);
76}
77
78void ProgramManager::RestoreGuestPipeline() {
79 if (use_assembly_programs) {
80 glBindProgramPipeline(0);
81 } else {
82 glBindProgramPipeline(graphics_pipeline.handle);
83 }
84}
85
86void ProgramManager::BindHostCompute(GLuint program) {
87 if (use_assembly_programs) {
88 glDisable(GL_COMPUTE_PROGRAM_NV);
89 }
90 glUseProgram(program);
91 is_graphics_bound = false;
92}
93
94void ProgramManager::RestoreGuestCompute() {
95 if (use_assembly_programs) {
96 glEnable(GL_COMPUTE_PROGRAM_NV);
97 glUseProgram(0);
98 }
99}
100
101void ProgramManager::UseVertexShader(GLuint program) {
102 if (use_assembly_programs) {
103 BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
104 }
105 current_state.vertex = program;
106}
107
108void ProgramManager::UseGeometryShader(GLuint program) {
109 if (use_assembly_programs) {
110 BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
111 }
112 current_state.geometry = program;
113}
114
115void ProgramManager::UseFragmentShader(GLuint program) {
116 if (use_assembly_programs) {
117 BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
118 }
119 current_state.fragment = program;
120}
121
122void ProgramManager::UpdateSourcePrograms() {
123 if (!is_graphics_bound) {
124 is_graphics_bound = true;
125 glUseProgram(0);
126 }
127
128 const GLuint handle = graphics_pipeline.handle;
129 const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
130 if (current == old) {
131 return;
132 }
133 glUseProgramStages(handle, stage, current);
134 };
135 update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
136 update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
137 update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
138
139 old_state = current_state;
140}
141
142void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
143 const auto& regs = maxwell.regs;
144
145 // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
146 y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
147}
148
149} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index ad42cce74..d7ef0775d 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -4,79 +4,142 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cstddef> 7#include <array>
8#include <span>
8 9
9#include <glad/glad.h> 10#include <glad/glad.h>
10 11
12#include "video_core/renderer_opengl/gl_device.h"
11#include "video_core/renderer_opengl/gl_resource_manager.h" 13#include "video_core/renderer_opengl/gl_resource_manager.h"
12#include "video_core/renderer_opengl/maxwell_to_gl.h"
13 14
14namespace OpenGL { 15namespace OpenGL {
15 16
16class Device;
17
18/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
19/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
20/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
21/// Not following that rule will cause problems on some AMD drivers.
22struct alignas(16) MaxwellUniformData {
23 void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
24
25 GLfloat y_direction;
26};
27static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
28static_assert(sizeof(MaxwellUniformData) < 16384,
29 "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
30
31class ProgramManager { 17class ProgramManager {
32public: 18 static constexpr size_t NUM_STAGES = 5;
33 explicit ProgramManager(const Device& device);
34 ~ProgramManager();
35
36 /// Binds a compute program
37 void BindCompute(GLuint program);
38
39 /// Updates bound programs.
40 void BindGraphicsPipeline();
41
42 /// Binds an OpenGL pipeline object unsynchronized with the guest state.
43 void BindHostPipeline(GLuint pipeline);
44
45 /// Rewinds BindHostPipeline state changes.
46 void RestoreGuestPipeline();
47
48 /// Binds an OpenGL GLSL program object unsynchronized with the guest state.
49 void BindHostCompute(GLuint program);
50 19
51 /// Rewinds BindHostCompute state changes. 20 static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
52 void RestoreGuestCompute(); 21 GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
53 22 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
54 void UseVertexShader(GLuint program);
55 void UseGeometryShader(GLuint program);
56 void UseFragmentShader(GLuint program);
57
58private:
59 struct PipelineState {
60 GLuint vertex = 0;
61 GLuint geometry = 0;
62 GLuint fragment = 0;
63 }; 23 };
64 24
65 /// Update GLSL programs. 25public:
66 void UpdateSourcePrograms(); 26 explicit ProgramManager(const Device& device) {
67 27 glCreateProgramPipelines(1, &pipeline.handle);
68 OGLPipeline graphics_pipeline; 28 if (device.UseAssemblyShaders()) {
69 29 glEnable(GL_COMPUTE_PROGRAM_NV);
70 PipelineState current_state; 30 }
71 PipelineState old_state; 31 }
72 32
73 bool use_assembly_programs = false; 33 void BindComputeProgram(GLuint program) {
74 34 glUseProgram(program);
75 bool is_graphics_bound = true; 35 is_compute_bound = true;
36 }
37
38 void BindComputeAssemblyProgram(GLuint program) {
39 if (current_assembly_compute_program != program) {
40 current_assembly_compute_program = program;
41 glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
42 }
43 UnbindPipeline();
44 }
45
46 void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
47 static constexpr std::array<GLenum, 5> stage_enums{
48 GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
49 GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
50 };
51 for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
52 if (current_programs[stage] != programs[stage].handle) {
53 current_programs[stage] = programs[stage].handle;
54 glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
55 }
56 }
57 BindPipeline();
58 }
59
60 void BindPresentPrograms(GLuint vertex, GLuint fragment) {
61 if (current_programs[0] != vertex) {
62 current_programs[0] = vertex;
63 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
64 }
65 if (current_programs[4] != fragment) {
66 current_programs[4] = fragment;
67 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
68 }
69 glUseProgramStages(
70 pipeline.handle,
71 GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
72 current_programs[1] = 0;
73 current_programs[2] = 0;
74 current_programs[3] = 0;
75
76 if (current_stage_mask != 0) {
77 current_stage_mask = 0;
78 for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
79 glDisable(program_type);
80 }
81 }
82 BindPipeline();
83 }
84
85 void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
86 u32 stage_mask) {
87 const u32 changed_mask = current_stage_mask ^ stage_mask;
88 current_stage_mask = stage_mask;
89
90 if (changed_mask != 0) {
91 for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
92 if (((changed_mask >> stage) & 1) != 0) {
93 if (((stage_mask >> stage) & 1) != 0) {
94 glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
95 } else {
96 glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
97 }
98 }
99 }
100 }
101 for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
102 if (current_programs[stage] != programs[stage].handle) {
103 current_programs[stage] = programs[stage].handle;
104 glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
105 }
106 }
107 UnbindPipeline();
108 }
109
110 void RestoreGuestCompute() {}
76 111
77 bool vertex_enabled = false; 112private:
78 bool geometry_enabled = false; 113 void BindPipeline() {
79 bool fragment_enabled = false; 114 if (!is_pipeline_bound) {
115 is_pipeline_bound = true;
116 glBindProgramPipeline(pipeline.handle);
117 }
118 UnbindCompute();
119 }
120
121 void UnbindPipeline() {
122 if (is_pipeline_bound) {
123 is_pipeline_bound = false;
124 glBindProgramPipeline(0);
125 }
126 UnbindCompute();
127 }
128
129 void UnbindCompute() {
130 if (is_compute_bound) {
131 is_compute_bound = false;
132 glUseProgram(0);
133 }
134 }
135
136 OGLPipeline pipeline;
137 bool is_pipeline_bound{};
138 bool is_compute_bound{};
139
140 u32 current_stage_mask = 0;
141 std::array<GLuint, NUM_STAGES> current_programs{};
142 GLuint current_assembly_compute_program = 0;
80}; 143};
81 144
82} // namespace OpenGL 145} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 4bf0d6090..d432072ad 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -5,57 +5,108 @@
5#include <string_view> 5#include <string_view>
6#include <vector> 6#include <vector>
7#include <glad/glad.h> 7#include <glad/glad.h>
8
8#include "common/assert.h" 9#include "common/assert.h"
9#include "common/logging/log.h" 10#include "common/logging/log.h"
11#include "common/settings.h"
10#include "video_core/renderer_opengl/gl_shader_util.h" 12#include "video_core/renderer_opengl/gl_shader_util.h"
11 13
12namespace OpenGL::GLShader { 14namespace OpenGL {
13 15
14namespace { 16static OGLProgram LinkSeparableProgram(GLuint shader) {
17 OGLProgram program;
18 program.handle = glCreateProgram();
19 glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
20 glAttachShader(program.handle, shader);
21 glLinkProgram(program.handle);
22 if (!Settings::values.renderer_debug) {
23 return program;
24 }
25 GLint link_status{};
26 glGetProgramiv(program.handle, GL_LINK_STATUS, &link_status);
15 27
16std::string_view StageDebugName(GLenum type) { 28 GLint log_length{};
17 switch (type) { 29 glGetProgramiv(program.handle, GL_INFO_LOG_LENGTH, &log_length);
18 case GL_VERTEX_SHADER: 30 if (log_length == 0) {
19 return "vertex"; 31 return program;
20 case GL_GEOMETRY_SHADER: 32 }
21 return "geometry"; 33 std::string log(log_length, 0);
22 case GL_FRAGMENT_SHADER: 34 glGetProgramInfoLog(program.handle, log_length, nullptr, log.data());
23 return "fragment"; 35 if (link_status == GL_FALSE) {
24 case GL_COMPUTE_SHADER: 36 LOG_ERROR(Render_OpenGL, "{}", log);
25 return "compute"; 37 } else {
38 LOG_WARNING(Render_OpenGL, "{}", log);
26 } 39 }
27 UNIMPLEMENTED(); 40 return program;
28 return "unknown";
29} 41}
30 42
31} // Anonymous namespace 43static void LogShader(GLuint shader, std::string_view code = {}) {
44 GLint shader_status{};
45 glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status);
46 if (shader_status == GL_FALSE) {
47 LOG_ERROR(Render_OpenGL, "Failed to build shader");
48 }
49 GLint log_length{};
50 glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
51 if (log_length == 0) {
52 return;
53 }
54 std::string log(log_length, 0);
55 glGetShaderInfoLog(shader, log_length, nullptr, log.data());
56 if (shader_status == GL_FALSE) {
57 LOG_ERROR(Render_OpenGL, "{}", log);
58 if (!code.empty()) {
59 LOG_INFO(Render_OpenGL, "\n{}", code);
60 }
61 } else {
62 LOG_WARNING(Render_OpenGL, "{}", log);
63 }
64}
32 65
33GLuint LoadShader(std::string_view source, GLenum type) { 66OGLProgram CreateProgram(std::string_view code, GLenum stage) {
34 const std::string_view debug_type = StageDebugName(type); 67 OGLShader shader;
35 const GLuint shader_id = glCreateShader(type); 68 shader.handle = glCreateShader(stage);
36 69
37 const GLchar* source_string = source.data(); 70 const GLint length = static_cast<GLint>(code.size());
38 const GLint source_length = static_cast<GLint>(source.size()); 71 const GLchar* const code_ptr = code.data();
72 glShaderSource(shader.handle, 1, &code_ptr, &length);
73 glCompileShader(shader.handle);
74 if (Settings::values.renderer_debug) {
75 LogShader(shader.handle, code);
76 }
77 return LinkSeparableProgram(shader.handle);
78}
39 79
40 glShaderSource(shader_id, 1, &source_string, &source_length); 80OGLProgram CreateProgram(std::span<const u32> code, GLenum stage) {
41 LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); 81 OGLShader shader;
42 glCompileShader(shader_id); 82 shader.handle = glCreateShader(stage);
43 83
44 GLint result = GL_FALSE; 84 glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(),
45 GLint info_log_length; 85 static_cast<GLsizei>(code.size_bytes()));
46 glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result); 86 glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr);
47 glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length); 87 if (Settings::values.renderer_debug) {
88 LogShader(shader.handle);
89 }
90 return LinkSeparableProgram(shader.handle);
91}
48 92
49 if (info_log_length > 1) { 93OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) {
50 std::string shader_error(info_log_length, ' '); 94 OGLAssemblyProgram program;
51 glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]); 95 glGenProgramsARB(1, &program.handle);
52 if (result == GL_TRUE) { 96 glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB,
53 LOG_DEBUG(Render_OpenGL, "{}", shader_error); 97 static_cast<GLsizei>(code.size()), code.data());
54 } else { 98 if (Settings::values.renderer_debug) {
55 LOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error); 99 const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
100 if (err && *err) {
101 if (std::strstr(err, "error")) {
102 LOG_CRITICAL(Render_OpenGL, "\n{}", err);
103 LOG_INFO(Render_OpenGL, "\n{}", code);
104 } else {
105 LOG_WARNING(Render_OpenGL, "\n{}", err);
106 }
56 } 107 }
57 } 108 }
58 return shader_id; 109 return program;
59} 110}
60 111
61} // namespace OpenGL::GLShader 112} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 1b770532e..4e1a2a8e1 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -4,92 +4,23 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <span>
7#include <string> 8#include <string>
9#include <string_view>
8#include <vector> 10#include <vector>
11
9#include <glad/glad.h> 12#include <glad/glad.h>
13
10#include "common/assert.h" 14#include "common/assert.h"
11#include "common/logging/log.h" 15#include "common/logging/log.h"
16#include "video_core/renderer_opengl/gl_resource_manager.h"
12 17
13namespace OpenGL::GLShader { 18namespace OpenGL {
14
15/**
16 * Utility function to log the source code of a list of shaders.
17 * @param shaders The OpenGL shaders whose source we will print.
18 */
19template <typename... T>
20void LogShaderSource(T... shaders) {
21 auto shader_list = {shaders...};
22
23 for (const auto& shader : shader_list) {
24 if (shader == 0)
25 continue;
26
27 GLint source_length;
28 glGetShaderiv(shader, GL_SHADER_SOURCE_LENGTH, &source_length);
29
30 std::string source(source_length, ' ');
31 glGetShaderSource(shader, source_length, nullptr, &source[0]);
32 LOG_INFO(Render_OpenGL, "Shader source {}", source);
33 }
34}
35
36/**
37 * Utility function to create and compile an OpenGL GLSL shader
38 * @param source String of the GLSL shader program
39 * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
40 */
41GLuint LoadShader(std::string_view source, GLenum type);
42
43/**
44 * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
45 * @param separable_program whether to create a separable program
46 * @param shaders ID of shaders to attach to the program
47 * @returns Handle of the newly created OpenGL program object
48 */
49template <typename... T>
50GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) {
51 // Link the program
52 LOG_DEBUG(Render_OpenGL, "Linking program...");
53
54 GLuint program_id = glCreateProgram();
55
56 ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...);
57
58 if (separable_program) {
59 glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
60 }
61 if (hint_retrievable) {
62 glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
63 }
64
65 glLinkProgram(program_id);
66
67 // Check the program
68 GLint result = GL_FALSE;
69 GLint info_log_length;
70 glGetProgramiv(program_id, GL_LINK_STATUS, &result);
71 glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
72
73 if (info_log_length > 1) {
74 std::string program_error(info_log_length, ' ');
75 glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
76 if (result == GL_TRUE) {
77 LOG_DEBUG(Render_OpenGL, "{}", program_error);
78 } else {
79 LOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error);
80 }
81 }
82
83 if (result == GL_FALSE) {
84 // There was a problem linking the shader, print the source for debugging purposes.
85 LogShaderSource(shaders...);
86 }
87 19
88 ASSERT_MSG(result == GL_TRUE, "Shader not linked"); 20OGLProgram CreateProgram(std::string_view code, GLenum stage);
89 21
90 ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...); 22OGLProgram CreateProgram(std::span<const u32> code, GLenum stage);
91 23
92 return program_id; 24OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target);
93}
94 25
95} // namespace OpenGL::GLShader 26} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index dbdf5230f..586da84e3 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -83,11 +83,6 @@ void SetupDirtyScissors(Tables& tables) {
83 FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors); 83 FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
84} 84}
85 85
86void SetupDirtyShaders(Tables& tables) {
87 FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram,
88 Shaders);
89}
90
91void SetupDirtyPolygonModes(Tables& tables) { 86void SetupDirtyPolygonModes(Tables& tables) {
92 tables[0][OFF(polygon_mode_front)] = PolygonModeFront; 87 tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
93 tables[0][OFF(polygon_mode_back)] = PolygonModeBack; 88 tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
@@ -217,7 +212,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
217 SetupDirtyScissors(tables); 212 SetupDirtyScissors(tables);
218 SetupDirtyVertexInstances(tables); 213 SetupDirtyVertexInstances(tables);
219 SetupDirtyVertexFormat(tables); 214 SetupDirtyVertexFormat(tables);
220 SetupDirtyShaders(tables);
221 SetupDirtyPolygonModes(tables); 215 SetupDirtyPolygonModes(tables);
222 SetupDirtyDepthTest(tables); 216 SetupDirtyDepthTest(tables);
223 SetupDirtyStencilTest(tables); 217 SetupDirtyStencilTest(tables);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 94c905116..5864c7c07 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -52,7 +52,6 @@ enum : u8 {
52 BlendState0, 52 BlendState0,
53 BlendState7 = BlendState0 + 7, 53 BlendState7 = BlendState0 + 7,
54 54
55 Shaders,
56 ClipDistances, 55 ClipDistances,
57 56
58 PolygonModes, 57 PolygonModes,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index ff0f03e99..c373c9cb4 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -24,9 +24,7 @@
24#include "video_core/textures/decoders.h" 24#include "video_core/textures/decoders.h"
25 25
26namespace OpenGL { 26namespace OpenGL {
27
28namespace { 27namespace {
29
30using Tegra::Texture::SwizzleSource; 28using Tegra::Texture::SwizzleSource;
31using Tegra::Texture::TextureMipmapFilter; 29using Tegra::Texture::TextureMipmapFilter;
32using Tegra::Texture::TextureType; 30using Tegra::Texture::TextureType;
@@ -59,107 +57,6 @@ struct CopyRegion {
59 GLsizei depth; 57 GLsizei depth;
60}; 58};
61 59
62struct FormatTuple {
63 GLenum internal_format;
64 GLenum format = GL_NONE;
65 GLenum type = GL_NONE;
66};
67
68constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
69 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
70 {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
71 {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
72 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
73 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
74 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
75 {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
76 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
77 {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
78 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
79 {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
80 {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
81 {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
82 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
83 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
84 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
85 {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
86 {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
87 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
88 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
89 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
90 {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
91 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
92 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
93 {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
94 {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
95 {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
96 {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
97 {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
98 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
99 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
100 {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
101 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
102 {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
103 {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
104 {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
105 {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
106 {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
107 {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
108 {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
109 {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
110 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
111 {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
112 {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
113 {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
114 {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
115 {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
116 {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
117 {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
118 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
119 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
120 {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
121 {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
122 {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
123 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
124 {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
125 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
126 {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
127 {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
128 {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
129 {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
130 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
131 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
132 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
133 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
134 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
135 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
136 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
137 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
138 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
139 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
140 {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
141 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
142 {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
143 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
144 {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
145 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
146 {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
147 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
148 {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
149 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
150 {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
151 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
152 {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
153 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
154 {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
155 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
156 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
157 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
158 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
159 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
160 GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
161}};
162
163constexpr std::array ACCELERATED_FORMATS{ 60constexpr std::array ACCELERATED_FORMATS{
164 GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, 61 GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F,
165 GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, 62 GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI,
@@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{
170 GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, 67 GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM,
171}; 68};
172 69
173const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
174 ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
175 return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
176}
177
178GLenum ImageTarget(const VideoCommon::ImageInfo& info) { 70GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
179 switch (info.type) { 71 switch (info.type) {
180 case ImageType::e1D: 72 case ImageType::e1D:
@@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
195 return GL_NONE; 87 return GL_NONE;
196} 88}
197 89
198GLenum ImageTarget(ImageViewType type, int num_samples = 1) { 90GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) {
199 const bool is_multisampled = num_samples > 1; 91 const bool is_multisampled = num_samples > 1;
200 switch (type) { 92 switch (type) {
201 case ImageViewType::e1D: 93 case Shader::TextureType::Color1D:
202 return GL_TEXTURE_1D; 94 return GL_TEXTURE_1D;
203 case ImageViewType::e2D: 95 case Shader::TextureType::Color2D:
204 return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; 96 return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
205 case ImageViewType::Cube: 97 case Shader::TextureType::ColorCube:
206 return GL_TEXTURE_CUBE_MAP; 98 return GL_TEXTURE_CUBE_MAP;
207 case ImageViewType::e3D: 99 case Shader::TextureType::Color3D:
208 return GL_TEXTURE_3D; 100 return GL_TEXTURE_3D;
209 case ImageViewType::e1DArray: 101 case Shader::TextureType::ColorArray1D:
210 return GL_TEXTURE_1D_ARRAY; 102 return GL_TEXTURE_1D_ARRAY;
211 case ImageViewType::e2DArray: 103 case Shader::TextureType::ColorArray2D:
212 return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; 104 return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY;
213 case ImageViewType::CubeArray: 105 case Shader::TextureType::ColorArrayCube:
214 return GL_TEXTURE_CUBE_MAP_ARRAY; 106 return GL_TEXTURE_CUBE_MAP_ARRAY;
215 case ImageViewType::Rect: 107 case Shader::TextureType::Buffer:
216 return GL_TEXTURE_RECTANGLE;
217 case ImageViewType::Buffer:
218 return GL_TEXTURE_BUFFER; 108 return GL_TEXTURE_BUFFER;
219 } 109 }
220 UNREACHABLE_MSG("Invalid image view type={}", type); 110 UNREACHABLE_MSG("Invalid image view type={}", type);
@@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
322 default: 212 default:
323 return false; 213 return false;
324 } 214 }
325 const GLenum internal_format = GetFormatTuple(info.format).internal_format; 215 const GLenum internal_format = MaxwellToGL::GetFormatTuple(info.format).internal_format;
326 const auto& format_info = runtime.FormatInfo(info.type, internal_format); 216 const auto& format_info = runtime.FormatInfo(info.type, internal_format);
327 if (format_info.is_compressed) { 217 if (format_info.is_compressed) {
328 return false; 218 return false;
@@ -414,11 +304,10 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
414 304
415void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { 305void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
416 if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { 306 if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
417 const GLuint texture = image_view->DefaultHandle(); 307 glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0);
418 glNamedFramebufferTexture(fbo, attachment, texture, 0);
419 return; 308 return;
420 } 309 }
421 const GLuint texture = image_view->Handle(ImageViewType::e3D); 310 const GLuint texture = image_view->Handle(Shader::TextureType::Color3D);
422 if (image_view->range.extent.layers > 1) { 311 if (image_view->range.extent.layers > 1) {
423 // TODO: OpenGL doesn't support rendering to a fixed number of slices 312 // TODO: OpenGL doesn't support rendering to a fixed number of slices
424 glNamedFramebufferTexture(fbo, attachment, texture, 0); 313 glNamedFramebufferTexture(fbo, attachment, texture, 0);
@@ -439,6 +328,28 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
439 } 328 }
440} 329}
441 330
331[[nodiscard]] GLenum ShaderFormat(Shader::ImageFormat format) {
332 switch (format) {
333 case Shader::ImageFormat::Typeless:
334 break;
335 case Shader::ImageFormat::R8_SINT:
336 return GL_R8I;
337 case Shader::ImageFormat::R8_UINT:
338 return GL_R8UI;
339 case Shader::ImageFormat::R16_UINT:
340 return GL_R16UI;
341 case Shader::ImageFormat::R16_SINT:
342 return GL_R16I;
343 case Shader::ImageFormat::R32_UINT:
344 return GL_R32UI;
345 case Shader::ImageFormat::R32G32_UINT:
346 return GL_RG32UI;
347 case Shader::ImageFormat::R32G32B32A32_UINT:
348 return GL_RGBA32UI;
349 }
350 UNREACHABLE_MSG("Invalid image format={}", format);
351 return GL_R32UI;
352}
442} // Anonymous namespace 353} // Anonymous namespace
443 354
444ImageBufferMap::~ImageBufferMap() { 355ImageBufferMap::~ImageBufferMap() {
@@ -453,7 +364,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
453 static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; 364 static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
454 for (size_t i = 0; i < TARGETS.size(); ++i) { 365 for (size_t i = 0; i < TARGETS.size(); ++i) {
455 const GLenum target = TARGETS[i]; 366 const GLenum target = TARGETS[i];
456 for (const FormatTuple& tuple : FORMAT_TABLE) { 367 for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) {
457 const GLenum format = tuple.internal_format; 368 const GLenum format = tuple.internal_format;
458 GLint compat_class; 369 GLint compat_class;
459 GLint compat_type; 370 GLint compat_type;
@@ -475,11 +386,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
475 null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); 386 null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
476 null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); 387 null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
477 null_image_3d.Create(GL_TEXTURE_3D); 388 null_image_3d.Create(GL_TEXTURE_3D);
478 null_image_rect.Create(GL_TEXTURE_RECTANGLE);
479 glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); 389 glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1);
480 glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); 390 glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6);
481 glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); 391 glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1);
482 glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1);
483 392
484 std::array<GLuint, 4> new_handles; 393 std::array<GLuint, 4> new_handles;
485 glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data()); 394 glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data());
@@ -496,29 +405,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
496 glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, 405 glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle,
497 GL_R8, 0, 1, 0, 6); 406 GL_R8, 0, 1, 0, 6);
498 const std::array texture_handles{ 407 const std::array texture_handles{
499 null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, 408 null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
500 null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle, 409 null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle,
501 null_image_view_2d_array.handle, null_image_view_cube.handle, 410 null_image_view_cube.handle,
502 }; 411 };
503 for (const GLuint handle : texture_handles) { 412 for (const GLuint handle : texture_handles) {
504 static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; 413 static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO};
505 glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); 414 glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data());
506 } 415 }
507 const auto set_view = [this](ImageViewType type, GLuint handle) { 416 const auto set_view = [this](Shader::TextureType type, GLuint handle) {
508 if (device.HasDebuggingToolAttached()) { 417 if (device.HasDebuggingToolAttached()) {
509 const std::string name = fmt::format("NullImage {}", type); 418 const std::string name = fmt::format("NullImage {}", type);
510 glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); 419 glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
511 } 420 }
512 null_image_views[static_cast<size_t>(type)] = handle; 421 null_image_views[static_cast<size_t>(type)] = handle;
513 }; 422 };
514 set_view(ImageViewType::e1D, null_image_view_1d.handle); 423 set_view(Shader::TextureType::Color1D, null_image_view_1d.handle);
515 set_view(ImageViewType::e2D, null_image_view_2d.handle); 424 set_view(Shader::TextureType::Color2D, null_image_view_2d.handle);
516 set_view(ImageViewType::Cube, null_image_view_cube.handle); 425 set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle);
517 set_view(ImageViewType::e3D, null_image_3d.handle); 426 set_view(Shader::TextureType::Color3D, null_image_3d.handle);
518 set_view(ImageViewType::e1DArray, null_image_1d_array.handle); 427 set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle);
519 set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); 428 set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle);
520 set_view(ImageViewType::CubeArray, null_image_cube_array.handle); 429 set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle);
521 set_view(ImageViewType::Rect, null_image_rect.handle);
522} 430}
523 431
524TextureCacheRuntime::~TextureCacheRuntime() = default; 432TextureCacheRuntime::~TextureCacheRuntime() = default;
@@ -710,7 +618,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
710 gl_format = GL_RGBA; 618 gl_format = GL_RGBA;
711 gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 619 gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
712 } else { 620 } else {
713 const auto& tuple = GetFormatTuple(info.format); 621 const auto& tuple = MaxwellToGL::GetFormatTuple(info.format);
714 gl_internal_format = tuple.internal_format; 622 gl_internal_format = tuple.internal_format;
715 gl_format = tuple.format; 623 gl_format = tuple.format;
716 gl_type = tuple.type; 624 gl_type = tuple.type;
@@ -750,8 +658,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
750 glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); 658 glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
751 break; 659 break;
752 case GL_TEXTURE_BUFFER: 660 case GL_TEXTURE_BUFFER:
753 buffer.Create(); 661 UNREACHABLE();
754 glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);
755 break; 662 break;
756 default: 663 default:
757 UNREACHABLE_MSG("Invalid target=0x{:x}", target); 664 UNREACHABLE_MSG("Invalid target=0x{:x}", target);
@@ -789,14 +696,6 @@ void Image::UploadMemory(const ImageBufferMap& map,
789 } 696 }
790} 697}
791 698
792void Image::UploadMemory(const ImageBufferMap& map,
793 std::span<const VideoCommon::BufferCopy> copies) {
794 for (const VideoCommon::BufferCopy& copy : copies) {
795 glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
796 copy.dst_offset, copy.size);
797 }
798}
799
800void Image::DownloadMemory(ImageBufferMap& map, 699void Image::DownloadMemory(ImageBufferMap& map,
801 std::span<const VideoCommon::BufferImageCopy> copies) { 700 std::span<const VideoCommon::BufferImageCopy> copies) {
802 glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API 701 glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
@@ -958,23 +857,30 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
958 if (True(image.flags & ImageFlagBits::Converted)) { 857 if (True(image.flags & ImageFlagBits::Converted)) {
959 internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; 858 internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
960 } else { 859 } else {
961 internal_format = GetFormatTuple(format).internal_format; 860 internal_format = MaxwellToGL::GetFormatTuple(format).internal_format;
861 }
862 full_range = info.range;
863 flat_range = info.range;
864 set_object_label = device.HasDebuggingToolAttached();
865 is_render_target = info.IsRenderTarget();
866 original_texture = image.texture.handle;
867 num_samples = image.info.num_samples;
868 if (!is_render_target) {
869 swizzle[0] = info.x_source;
870 swizzle[1] = info.y_source;
871 swizzle[2] = info.z_source;
872 swizzle[3] = info.w_source;
962 } 873 }
963 VideoCommon::SubresourceRange flatten_range = info.range;
964 std::array<GLuint, 2> handles;
965 stored_views.reserve(2);
966
967 switch (info.type) { 874 switch (info.type) {
968 case ImageViewType::e1DArray: 875 case ImageViewType::e1DArray:
969 flatten_range.extent.layers = 1; 876 flat_range.extent.layers = 1;
970 [[fallthrough]]; 877 [[fallthrough]];
971 case ImageViewType::e1D: 878 case ImageViewType::e1D:
972 glGenTextures(2, handles.data()); 879 SetupView(Shader::TextureType::Color1D);
973 SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); 880 SetupView(Shader::TextureType::ColorArray1D);
974 SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range);
975 break; 881 break;
976 case ImageViewType::e2DArray: 882 case ImageViewType::e2DArray:
977 flatten_range.extent.layers = 1; 883 flat_range.extent.layers = 1;
978 [[fallthrough]]; 884 [[fallthrough]];
979 case ImageViewType::e2D: 885 case ImageViewType::e2D:
980 if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) { 886 if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) {
@@ -984,63 +890,126 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
984 .base = {.level = info.range.base.level, .layer = 0}, 890 .base = {.level = info.range.base.level, .layer = 0},
985 .extent = {.levels = 1, .layers = 1}, 891 .extent = {.levels = 1, .layers = 1},
986 }; 892 };
987 glGenTextures(1, handles.data()); 893 full_range = slice_range;
988 SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); 894
989 break; 895 SetupView(Shader::TextureType::Color3D);
896 } else {
897 SetupView(Shader::TextureType::Color2D);
898 SetupView(Shader::TextureType::ColorArray2D);
990 } 899 }
991 glGenTextures(2, handles.data());
992 SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range);
993 SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range);
994 break; 900 break;
995 case ImageViewType::e3D: 901 case ImageViewType::e3D:
996 glGenTextures(1, handles.data()); 902 SetupView(Shader::TextureType::Color3D);
997 SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range);
998 break; 903 break;
999 case ImageViewType::CubeArray: 904 case ImageViewType::CubeArray:
1000 flatten_range.extent.layers = 6; 905 flat_range.extent.layers = 6;
1001 [[fallthrough]]; 906 [[fallthrough]];
1002 case ImageViewType::Cube: 907 case ImageViewType::Cube:
1003 glGenTextures(2, handles.data()); 908 SetupView(Shader::TextureType::ColorCube);
1004 SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); 909 SetupView(Shader::TextureType::ColorArrayCube);
1005 SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range);
1006 break; 910 break;
1007 case ImageViewType::Rect: 911 case ImageViewType::Rect:
1008 glGenTextures(1, handles.data()); 912 UNIMPLEMENTED();
1009 SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range);
1010 break; 913 break;
1011 case ImageViewType::Buffer: 914 case ImageViewType::Buffer:
1012 glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); 915 UNREACHABLE();
1013 SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); 916 break;
917 }
918 switch (info.type) {
919 case ImageViewType::e1D:
920 default_handle = Handle(Shader::TextureType::Color1D);
921 break;
922 case ImageViewType::e1DArray:
923 default_handle = Handle(Shader::TextureType::ColorArray1D);
924 break;
925 case ImageViewType::e2D:
926 default_handle = Handle(Shader::TextureType::Color2D);
927 break;
928 case ImageViewType::e2DArray:
929 default_handle = Handle(Shader::TextureType::ColorArray2D);
930 break;
931 case ImageViewType::e3D:
932 default_handle = Handle(Shader::TextureType::Color3D);
933 break;
934 case ImageViewType::Cube:
935 default_handle = Handle(Shader::TextureType::ColorCube);
936 break;
937 case ImageViewType::CubeArray:
938 default_handle = Handle(Shader::TextureType::ColorArrayCube);
939 break;
940 default:
1014 break; 941 break;
1015 } 942 }
1016 default_handle = Handle(info.type);
1017} 943}
1018 944
945ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
946 const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
947 : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
948 buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
949
950ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
951 const VideoCommon::ImageViewInfo& view_info)
952 : VideoCommon::ImageViewBase{info, view_info} {}
953
1019ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) 954ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
1020 : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} 955 : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
1021 956
1022void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, 957GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) {
1023 GLuint handle, const VideoCommon::ImageViewInfo& info, 958 if (image_format == Shader::ImageFormat::Typeless) {
1024 VideoCommon::SubresourceRange view_range) { 959 return Handle(texture_type);
1025 if (info.type == ImageViewType::Buffer) { 960 }
1026 // TODO: Take offset from buffer cache 961 const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
1027 glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, 962 image_format == Shader::ImageFormat::R16_SINT};
1028 image.guest_size_bytes); 963 if (!storage_views) {
1029 } else { 964 storage_views = std::make_unique<StorageViews>();
1030 const GLuint parent = image.texture.handle; 965 }
1031 const GLenum target = ImageTarget(view_type, image.info.num_samples); 966 auto& type_views{is_signed ? storage_views->signeds : storage_views->unsigneds};
1032 glTextureView(handle, target, parent, internal_format, view_range.base.level, 967 GLuint& view{type_views[static_cast<size_t>(texture_type)]};
1033 view_range.extent.levels, view_range.base.layer, view_range.extent.layers); 968 if (view == 0) {
1034 if (!info.IsRenderTarget()) { 969 view = MakeView(texture_type, ShaderFormat(image_format));
1035 ApplySwizzle(handle, format, info.Swizzle()); 970 }
1036 } 971 return view;
972}
973
974void ImageView::SetupView(Shader::TextureType view_type) {
975 views[static_cast<size_t>(view_type)] = MakeView(view_type, internal_format);
976}
977
978GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) {
979 VideoCommon::SubresourceRange view_range;
980 switch (view_type) {
981 case Shader::TextureType::Color1D:
982 case Shader::TextureType::Color2D:
983 case Shader::TextureType::ColorCube:
984 view_range = flat_range;
985 break;
986 case Shader::TextureType::ColorArray1D:
987 case Shader::TextureType::ColorArray2D:
988 case Shader::TextureType::Color3D:
989 case Shader::TextureType::ColorArrayCube:
990 view_range = full_range;
991 break;
992 default:
993 UNREACHABLE();
1037 } 994 }
1038 if (device.HasDebuggingToolAttached()) { 995 OGLTextureView& view = stored_views.emplace_back();
1039 const std::string name = VideoCommon::Name(*this, view_type); 996 view.Create();
1040 glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); 997
998 const GLenum target = ImageTarget(view_type, num_samples);
999 glTextureView(view.handle, target, original_texture, view_format, view_range.base.level,
1000 view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
1001 if (!is_render_target) {
1002 std::array<SwizzleSource, 4> casted_swizzle;
1003 std::ranges::transform(swizzle, casted_swizzle.begin(), [](u8 component_swizzle) {
1004 return static_cast<SwizzleSource>(component_swizzle);
1005 });
1006 ApplySwizzle(view.handle, format, casted_swizzle);
1007 }
1008 if (set_object_label) {
1009 const std::string name = VideoCommon::Name(*this);
1010 glObjectLabel(GL_TEXTURE, view.handle, static_cast<GLsizei>(name.size()), name.data());
1041 } 1011 }
1042 stored_views.emplace_back().handle = handle; 1012 return view.handle;
1043 views[static_cast<size_t>(view_type)] = handle;
1044} 1013}
1045 1014
1046Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { 1015Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index cf3b789e3..921072ebe 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -9,6 +9,7 @@
9 9
10#include <glad/glad.h> 10#include <glad/glad.h>
11 11
12#include "shader_recompiler/shader_info.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 13#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/util_shaders.h" 14#include "video_core/renderer_opengl/util_shaders.h"
14#include "video_core/texture_cache/texture_cache.h" 15#include "video_core/texture_cache/texture_cache.h"
@@ -127,13 +128,12 @@ private:
127 OGLTexture null_image_1d_array; 128 OGLTexture null_image_1d_array;
128 OGLTexture null_image_cube_array; 129 OGLTexture null_image_cube_array;
129 OGLTexture null_image_3d; 130 OGLTexture null_image_3d;
130 OGLTexture null_image_rect;
131 OGLTextureView null_image_view_1d; 131 OGLTextureView null_image_view_1d;
132 OGLTextureView null_image_view_2d; 132 OGLTextureView null_image_view_2d;
133 OGLTextureView null_image_view_2d_array; 133 OGLTextureView null_image_view_2d_array;
134 OGLTextureView null_image_view_cube; 134 OGLTextureView null_image_view_cube;
135 135
136 std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views; 136 std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
137}; 137};
138 138
139class Image : public VideoCommon::ImageBase { 139class Image : public VideoCommon::ImageBase {
@@ -154,8 +154,6 @@ public:
154 void UploadMemory(const ImageBufferMap& map, 154 void UploadMemory(const ImageBufferMap& map,
155 std::span<const VideoCommon::BufferImageCopy> copies); 155 std::span<const VideoCommon::BufferImageCopy> copies);
156 156
157 void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
158
159 void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); 157 void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
160 158
161 GLuint StorageHandle() noexcept; 159 GLuint StorageHandle() noexcept;
@@ -170,7 +168,6 @@ private:
170 void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); 168 void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
171 169
172 OGLTexture texture; 170 OGLTexture texture;
173 OGLBuffer buffer;
174 OGLTextureView store_view; 171 OGLTextureView store_view;
175 GLenum gl_internal_format = GL_NONE; 172 GLenum gl_internal_format = GL_NONE;
176 GLenum gl_format = GL_NONE; 173 GLenum gl_format = GL_NONE;
@@ -182,10 +179,17 @@ class ImageView : public VideoCommon::ImageViewBase {
182 179
183public: 180public:
184 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); 181 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
182 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
183 const VideoCommon::ImageViewInfo&, GPUVAddr);
184 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
185 const VideoCommon::ImageViewInfo& view_info);
185 explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); 186 explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
186 187
187 [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { 188 [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
188 return views[static_cast<size_t>(query_type)]; 189 Shader::ImageFormat image_format);
190
191 [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept {
192 return views[static_cast<size_t>(handle_type)];
189 } 193 }
190 194
191 [[nodiscard]] GLuint DefaultHandle() const noexcept { 195 [[nodiscard]] GLuint DefaultHandle() const noexcept {
@@ -196,15 +200,38 @@ public:
196 return internal_format; 200 return internal_format;
197 } 201 }
198 202
203 [[nodiscard]] GPUVAddr GpuAddr() const noexcept {
204 return gpu_addr;
205 }
206
207 [[nodiscard]] u32 BufferSize() const noexcept {
208 return buffer_size;
209 }
210
199private: 211private:
200 void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, 212 struct StorageViews {
201 const VideoCommon::ImageViewInfo& info, 213 std::array<GLuint, Shader::NUM_TEXTURE_TYPES> signeds{};
202 VideoCommon::SubresourceRange view_range); 214 std::array<GLuint, Shader::NUM_TEXTURE_TYPES> unsigneds{};
215 };
216
217 void SetupView(Shader::TextureType view_type);
218
219 GLuint MakeView(Shader::TextureType view_type, GLenum view_format);
203 220
204 std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{}; 221 std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{};
205 std::vector<OGLTextureView> stored_views; 222 std::vector<OGLTextureView> stored_views;
206 GLuint default_handle = 0; 223 std::unique_ptr<StorageViews> storage_views;
207 GLenum internal_format = GL_NONE; 224 GLenum internal_format = GL_NONE;
225 GLuint default_handle = 0;
226 GPUVAddr gpu_addr = 0;
227 u32 buffer_size = 0;
228 GLuint original_texture = 0;
229 int num_samples = 0;
230 VideoCommon::SubresourceRange flat_range;
231 VideoCommon::SubresourceRange full_range;
232 std::array<u8, 4> swizzle{};
233 bool set_object_label = false;
234 bool is_render_target = false;
208}; 235};
209 236
210class ImageAlloc : public VideoCommon::ImageAllocBase {}; 237class ImageAlloc : public VideoCommon::ImageAllocBase {};
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index f7ad8f370..672f94bfc 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -5,12 +5,120 @@
5#pragma once 5#pragma once
6 6
7#include <glad/glad.h> 7#include <glad/glad.h>
8
8#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/surface.h"
9 11
10namespace OpenGL::MaxwellToGL { 12namespace OpenGL::MaxwellToGL {
11 13
12using Maxwell = Tegra::Engines::Maxwell3D::Regs; 14using Maxwell = Tegra::Engines::Maxwell3D::Regs;
13 15
16struct FormatTuple {
17 GLenum internal_format;
18 GLenum format = GL_NONE;
19 GLenum type = GL_NONE;
20};
21
22constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TABLE = {{
23 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
24 {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
25 {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
26 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
27 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
28 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
29 {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
30 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
31 {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
32 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
33 {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
34 {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
35 {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
36 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
37 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
38 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
39 {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
40 {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
41 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
42 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
43 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
44 {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
45 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
46 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
47 {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
48 {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
49 {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
50 {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
51 {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
52 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
53 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
54 {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
55 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
56 {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
57 {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
58 {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
59 {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
60 {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
61 {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
62 {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
63 {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
64 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
65 {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
66 {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
67 {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
68 {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
69 {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
70 {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
71 {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
72 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
73 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
74 {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
75 {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
76 {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
77 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
78 {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
79 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
80 {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
81 {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
82 {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
83 {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
84 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
85 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
86 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
87 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
88 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
89 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
90 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
91 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
92 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
93 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
94 {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
95 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
96 {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
97 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
98 {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
99 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
100 {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
101 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
102 {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
103 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
104 {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
105 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
106 {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
107 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
108 {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
109 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
110 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
111 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
112 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
113 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
114 GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
115}};
116
117inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) {
118 ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
119 return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
120}
121
14inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { 122inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
15 switch (attrib.type) { 123 switch (attrib.type) {
16 case Maxwell::VertexAttribute::Type::UnsignedNorm: 124 case Maxwell::VertexAttribute::Type::UnsignedNorm:
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index c12929de6..285e78384 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -25,6 +25,7 @@
25#include "video_core/host_shaders/opengl_present_vert.h" 25#include "video_core/host_shaders/opengl_present_vert.h"
26#include "video_core/renderer_opengl/gl_rasterizer.h" 26#include "video_core/renderer_opengl/gl_rasterizer.h"
27#include "video_core/renderer_opengl/gl_shader_manager.h" 27#include "video_core/renderer_opengl/gl_shader_manager.h"
28#include "video_core/renderer_opengl/gl_shader_util.h"
28#include "video_core/renderer_opengl/renderer_opengl.h" 29#include "video_core/renderer_opengl/renderer_opengl.h"
29#include "video_core/textures/decoders.h" 30#include "video_core/textures/decoders.h"
30 31
@@ -139,6 +140,26 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
139 } 140 }
140 AddTelemetryFields(); 141 AddTelemetryFields();
141 InitOpenGLObjects(); 142 InitOpenGLObjects();
143
144 // Initialize default attributes to match hardware's disabled attributes
145 GLint max_attribs{};
146 glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_attribs);
147 for (GLint attrib = 0; attrib < max_attribs; ++attrib) {
148 glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 1.0f);
149 }
150 // Enable seamless cubemaps when per texture parameters are not available
151 if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
152 glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
153 }
154 // Enable unified vertex attributes and query vertex buffer address when the driver supports it
155 if (device.HasVertexBufferUnifiedMemory()) {
156 glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
157 glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
158
159 glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
160 glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
161 &vertex_buffer_address);
162 }
142} 163}
143 164
144RendererOpenGL::~RendererOpenGL() = default; 165RendererOpenGL::~RendererOpenGL() = default;
@@ -230,18 +251,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
230 251
231void RendererOpenGL::InitOpenGLObjects() { 252void RendererOpenGL::InitOpenGLObjects() {
232 // Create shader programs 253 // Create shader programs
233 OGLShader vertex_shader; 254 present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
234 vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); 255 present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
235
236 OGLShader fragment_shader;
237 fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
238
239 vertex_program.Create(true, false, vertex_shader.handle);
240 fragment_program.Create(true, false, fragment_shader.handle);
241
242 pipeline.Create();
243 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
244 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
245 256
246 // Generate presentation sampler 257 // Generate presentation sampler
247 present_sampler.Create(); 258 present_sampler.Create();
@@ -263,21 +274,6 @@ void RendererOpenGL::InitOpenGLObjects() {
263 274
264 // Clear screen to black 275 // Clear screen to black
265 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); 276 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
266
267 // Enable seamless cubemaps when per texture parameters are not available
268 if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
269 glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
270 }
271
272 // Enable unified vertex attributes and query vertex buffer address when the driver supports it
273 if (device.HasVertexBufferUnifiedMemory()) {
274 glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
275 glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
276
277 glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
278 glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
279 &vertex_buffer_address);
280 }
281} 277}
282 278
283void RendererOpenGL::AddTelemetryFields() { 279void RendererOpenGL::AddTelemetryFields() {
@@ -342,8 +338,9 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
342 // Set projection matrix 338 // Set projection matrix
343 const std::array ortho_matrix = 339 const std::array ortho_matrix =
344 MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); 340 MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
345 glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE, 341 program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle);
346 std::data(ortho_matrix)); 342 glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE,
343 ortho_matrix.data());
347 344
348 const auto& texcoords = screen_info.display_texcoords; 345 const auto& texcoords = screen_info.display_texcoords;
349 auto left = texcoords.left; 346 auto left = texcoords.left;
@@ -404,8 +401,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
404 state_tracker.NotifyClipControl(); 401 state_tracker.NotifyClipControl();
405 state_tracker.NotifyAlphaTest(); 402 state_tracker.NotifyAlphaTest();
406 403
407 program_manager.BindHostPipeline(pipeline.handle);
408
409 state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); 404 state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
410 glEnable(GL_CULL_FACE); 405 glEnable(GL_CULL_FACE);
411 if (screen_info.display_srgb) { 406 if (screen_info.display_srgb) {
@@ -453,7 +448,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
453 glClear(GL_COLOR_BUFFER_BIT); 448 glClear(GL_COLOR_BUFFER_BIT);
454 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 449 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
455 450
456 program_manager.RestoreGuestPipeline(); 451 // TODO
452 // program_manager.RestoreGuestPipeline();
457} 453}
458 454
459void RendererOpenGL::RenderScreenshot() { 455void RendererOpenGL::RenderScreenshot() {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 0b66f8332..d455f572f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -12,7 +12,6 @@
12#include "video_core/renderer_opengl/gl_device.h" 12#include "video_core/renderer_opengl/gl_device.h"
13#include "video_core/renderer_opengl/gl_rasterizer.h" 13#include "video_core/renderer_opengl/gl_rasterizer.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h" 14#include "video_core/renderer_opengl/gl_resource_manager.h"
15#include "video_core/renderer_opengl/gl_shader_manager.h"
16#include "video_core/renderer_opengl/gl_state_tracker.h" 15#include "video_core/renderer_opengl/gl_state_tracker.h"
17 16
18namespace Core { 17namespace Core {
@@ -111,9 +110,8 @@ private:
111 // OpenGL object IDs 110 // OpenGL object IDs
112 OGLSampler present_sampler; 111 OGLSampler present_sampler;
113 OGLBuffer vertex_buffer; 112 OGLBuffer vertex_buffer;
114 OGLProgram vertex_program; 113 OGLProgram present_vertex;
115 OGLProgram fragment_program; 114 OGLProgram present_fragment;
116 OGLPipeline pipeline;
117 OGLFramebuffer screenshot_framebuffer; 115 OGLFramebuffer screenshot_framebuffer;
118 116
119 // GPU address of the vertex buffer 117 // GPU address of the vertex buffer
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 8fb5be393..37a4d1d9d 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -16,8 +16,8 @@
16#include "video_core/host_shaders/opengl_copy_bc4_comp.h" 16#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
17#include "video_core/host_shaders/opengl_copy_bgra_comp.h" 17#include "video_core/host_shaders/opengl_copy_bgra_comp.h"
18#include "video_core/host_shaders/pitch_unswizzle_comp.h" 18#include "video_core/host_shaders/pitch_unswizzle_comp.h"
19#include "video_core/renderer_opengl/gl_resource_manager.h"
20#include "video_core/renderer_opengl/gl_shader_manager.h" 19#include "video_core/renderer_opengl/gl_shader_manager.h"
20#include "video_core/renderer_opengl/gl_shader_util.h"
21#include "video_core/renderer_opengl/gl_texture_cache.h" 21#include "video_core/renderer_opengl/gl_texture_cache.h"
22#include "video_core/renderer_opengl/util_shaders.h" 22#include "video_core/renderer_opengl/util_shaders.h"
23#include "video_core/texture_cache/accelerated_swizzle.h" 23#include "video_core/texture_cache/accelerated_swizzle.h"
@@ -41,21 +41,14 @@ using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
41using VideoCore::Surface::BytesPerBlock; 41using VideoCore::Surface::BytesPerBlock;
42 42
43namespace { 43namespace {
44
45OGLProgram MakeProgram(std::string_view source) { 44OGLProgram MakeProgram(std::string_view source) {
46 OGLShader shader; 45 return CreateProgram(source, GL_COMPUTE_SHADER);
47 shader.Create(source, GL_COMPUTE_SHADER);
48
49 OGLProgram program;
50 program.Create(true, false, shader.handle);
51 return program;
52} 46}
53 47
54size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { 48size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) {
55 return static_cast<size_t>(copy.extent.width * copy.extent.height * 49 return static_cast<size_t>(copy.extent.width * copy.extent.height *
56 copy.src_subresource.num_layers); 50 copy.src_subresource.num_layers);
57} 51}
58
59} // Anonymous namespace 52} // Anonymous namespace
60 53
61UtilShaders::UtilShaders(ProgramManager& program_manager_) 54UtilShaders::UtilShaders(ProgramManager& program_manager_)
@@ -86,7 +79,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
86 .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), 79 .width = VideoCore::Surface::DefaultBlockWidth(image.info.format),
87 .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), 80 .height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
88 }; 81 };
89 program_manager.BindHostCompute(astc_decoder_program.handle); 82 program_manager.BindComputeProgram(astc_decoder_program.handle);
90 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); 83 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
91 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); 84 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
92 85
@@ -134,7 +127,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
134 static constexpr GLuint BINDING_INPUT_BUFFER = 1; 127 static constexpr GLuint BINDING_INPUT_BUFFER = 1;
135 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; 128 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
136 129
137 program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); 130 program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle);
138 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); 131 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
139 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); 132 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
140 133
@@ -173,7 +166,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
173 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; 166 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
174 167
175 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); 168 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
176 program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); 169 program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle);
177 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); 170 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
178 171
179 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); 172 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
@@ -222,7 +215,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
222 UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), 215 UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
223 "Non-power of two images are not implemented"); 216 "Non-power of two images are not implemented");
224 217
225 program_manager.BindHostCompute(pitch_unswizzle_program.handle); 218 program_manager.BindComputeProgram(pitch_unswizzle_program.handle);
226 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); 219 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
227 glUniform2ui(LOC_ORIGIN, 0, 0); 220 glUniform2ui(LOC_ORIGIN, 0, 0);
228 glUniform2i(LOC_DESTINATION, 0, 0); 221 glUniform2i(LOC_DESTINATION, 0, 0);
@@ -250,7 +243,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
250 static constexpr GLuint LOC_SRC_OFFSET = 0; 243 static constexpr GLuint LOC_SRC_OFFSET = 0;
251 static constexpr GLuint LOC_DST_OFFSET = 1; 244 static constexpr GLuint LOC_DST_OFFSET = 1;
252 245
253 program_manager.BindHostCompute(copy_bc4_program.handle); 246 program_manager.BindComputeProgram(copy_bc4_program.handle);
254 247
255 for (const ImageCopy& copy : copies) { 248 for (const ImageCopy& copy : copies) {
256 ASSERT(copy.src_subresource.base_layer == 0); 249 ASSERT(copy.src_subresource.base_layer == 0);
@@ -286,7 +279,7 @@ void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
286 break; 279 break;
287 case 4: { 280 case 4: {
288 // BGRA8 copy 281 // BGRA8 copy
289 program_manager.BindHostCompute(copy_bgra_program.handle); 282 program_manager.BindComputeProgram(copy_bgra_program.handle);
290 constexpr GLenum FORMAT = GL_RGBA8; 283 constexpr GLenum FORMAT = GL_RGBA8;
291 for (const ImageCopy& copy : copies) { 284 for (const ImageCopy& copy : copies) {
292 ASSERT(copy.src_offset == zero_offset); 285 ASSERT(copy.src_offset == zero_offset);
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index b7f5b8bc2..6c1b2f063 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -49,6 +49,16 @@ constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREA
49 .bindingCount = 1, 49 .bindingCount = 1,
50 .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, 50 .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
51}; 51};
52template <u32 num_textures>
53inline constexpr DescriptorBankInfo TEXTURE_DESCRIPTOR_BANK_INFO{
54 .uniform_buffers = 0,
55 .storage_buffers = 0,
56 .texture_buffers = 0,
57 .image_buffers = 0,
58 .textures = num_textures,
59 .images = 0,
60 .score = 2,
61};
52constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ 62constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
53 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 63 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
54 .pNext = nullptr, 64 .pNext = nullptr,
@@ -323,18 +333,19 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi
323 cmdbuf.SetScissor(0, scissor); 333 cmdbuf.SetScissor(0, scissor);
324 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); 334 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
325} 335}
326
327} // Anonymous namespace 336} // Anonymous namespace
328 337
329BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, 338BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
330 StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool) 339 StateTracker& state_tracker_, DescriptorPool& descriptor_pool)
331 : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, 340 : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_},
332 one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( 341 one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout(
333 ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), 342 ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
334 two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout( 343 two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout(
335 TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), 344 TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
336 one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout), 345 one_texture_descriptor_allocator{
337 two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout), 346 descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)},
347 two_textures_descriptor_allocator{
348 descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)},
338 one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( 349 one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(
339 PipelineLayoutCreateInfo(one_texture_set_layout.address()))), 350 PipelineLayoutCreateInfo(one_texture_set_layout.address()))),
340 two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( 351 two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
@@ -362,14 +373,14 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV
362 .operation = operation, 373 .operation = operation,
363 }; 374 };
364 const VkPipelineLayout layout = *one_texture_pipeline_layout; 375 const VkPipelineLayout layout = *one_texture_pipeline_layout;
365 const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); 376 const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
366 const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; 377 const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
367 const VkPipeline pipeline = FindOrEmplacePipeline(key); 378 const VkPipeline pipeline = FindOrEmplacePipeline(key);
368 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
369 scheduler.RequestRenderpass(dst_framebuffer); 379 scheduler.RequestRenderpass(dst_framebuffer);
370 scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set, 380 scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler,
371 &device = device](vk::CommandBuffer cmdbuf) { 381 src_view](vk::CommandBuffer cmdbuf) {
372 // TODO: Barriers 382 // TODO: Barriers
383 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
373 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); 384 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
374 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); 385 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
375 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, 386 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
@@ -391,12 +402,11 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
391 const VkPipelineLayout layout = *two_textures_pipeline_layout; 402 const VkPipelineLayout layout = *two_textures_pipeline_layout;
392 const VkSampler sampler = *nearest_sampler; 403 const VkSampler sampler = *nearest_sampler;
393 const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); 404 const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass());
394 const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
395 scheduler.RequestRenderpass(dst_framebuffer); 405 scheduler.RequestRenderpass(dst_framebuffer);
396 scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, 406 scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
397 src_stencil_view, descriptor_set, 407 src_stencil_view, this](vk::CommandBuffer cmdbuf) {
398 &device = device](vk::CommandBuffer cmdbuf) {
399 // TODO: Barriers 408 // TODO: Barriers
409 const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
400 UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, 410 UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view,
401 src_stencil_view); 411 src_stencil_view);
402 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); 412 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
@@ -416,7 +426,6 @@ void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
416 426
417void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, 427void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
418 const ImageView& src_image_view) { 428 const ImageView& src_image_view) {
419
420 ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); 429 ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
421 Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); 430 Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
422} 431}
@@ -436,16 +445,14 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
436void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, 445void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
437 const ImageView& src_image_view) { 446 const ImageView& src_image_view) {
438 const VkPipelineLayout layout = *one_texture_pipeline_layout; 447 const VkPipelineLayout layout = *one_texture_pipeline_layout;
439 const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); 448 const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
440 const VkSampler sampler = *nearest_sampler; 449 const VkSampler sampler = *nearest_sampler;
441 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
442 const VkExtent2D extent{ 450 const VkExtent2D extent{
443 .width = src_image_view.size.width, 451 .width = src_image_view.size.width,
444 .height = src_image_view.size.height, 452 .height = src_image_view.size.height,
445 }; 453 };
446 scheduler.RequestRenderpass(dst_framebuffer); 454 scheduler.RequestRenderpass(dst_framebuffer);
447 scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent, 455 scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) {
448 &device = device](vk::CommandBuffer cmdbuf) {
449 const VkOffset2D offset{ 456 const VkOffset2D offset{
450 .x = 0, 457 .x = 0,
451 .y = 0, 458 .y = 0,
@@ -466,6 +473,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb
466 .tex_scale = {viewport.width, viewport.height}, 473 .tex_scale = {viewport.width, viewport.height},
467 .tex_offset = {0.0f, 0.0f}, 474 .tex_offset = {0.0f, 0.0f},
468 }; 475 };
476 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
469 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); 477 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
470 478
471 // TODO: Barriers 479 // TODO: Barriers
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 0d81a06ed..33ee095c1 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -31,7 +31,7 @@ struct BlitImagePipelineKey {
31class BlitImageHelper { 31class BlitImageHelper {
32public: 32public:
33 explicit BlitImageHelper(const Device& device, VKScheduler& scheduler, 33 explicit BlitImageHelper(const Device& device, VKScheduler& scheduler,
34 StateTracker& state_tracker, VKDescriptorPool& descriptor_pool); 34 StateTracker& state_tracker, DescriptorPool& descriptor_pool);
35 ~BlitImageHelper(); 35 ~BlitImageHelper();
36 36
37 void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, 37 void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 362278f01..d70153df3 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -15,9 +15,7 @@
15#include "video_core/renderer_vulkan/vk_state_tracker.h" 15#include "video_core/renderer_vulkan/vk_state_tracker.h"
16 16
17namespace Vulkan { 17namespace Vulkan {
18
19namespace { 18namespace {
20
21constexpr size_t POINT = 0; 19constexpr size_t POINT = 0;
22constexpr size_t LINE = 1; 20constexpr size_t LINE = 1;
23constexpr size_t POLYGON = 2; 21constexpr size_t POLYGON = 2;
@@ -39,10 +37,20 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
39 POLYGON, // Patches 37 POLYGON, // Patches
40}; 38};
41 39
40void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) {
41 std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) {
42 return VideoCommon::TransformFeedbackState::Layout{
43 .stream = layout.stream,
44 .varying_count = layout.varying_count,
45 .stride = layout.stride,
46 };
47 });
48 state.varyings = regs.tfb_varying_locs;
49}
42} // Anonymous namespace 50} // Anonymous namespace
43 51
44void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, 52void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
45 bool has_extended_dynamic_state) { 53 bool has_extended_dynamic_state, bool has_dynamic_vertex_input) {
46 const Maxwell& regs = maxwell3d.regs; 54 const Maxwell& regs = maxwell3d.regs;
47 const std::array enabled_lut{ 55 const std::array enabled_lut{
48 regs.polygon_offset_point_enable, 56 regs.polygon_offset_point_enable,
@@ -52,6 +60,9 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
52 const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); 60 const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
53 61
54 raw1 = 0; 62 raw1 = 0;
63 extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0);
64 dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0);
65 xfb_enabled.Assign(regs.tfb_enabled != 0);
55 primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); 66 primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
56 depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); 67 depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
57 depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value()); 68 depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value());
@@ -63,37 +74,66 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
63 tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); 74 tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
64 logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); 75 logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
65 logic_op.Assign(PackLogicOp(regs.logic_op.operation)); 76 logic_op.Assign(PackLogicOp(regs.logic_op.operation));
66 rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
67 topology.Assign(regs.draw.topology); 77 topology.Assign(regs.draw.topology);
68 msaa_mode.Assign(regs.multisample_mode); 78 msaa_mode.Assign(regs.multisample_mode);
69 79
70 raw2 = 0; 80 raw2 = 0;
81 rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
71 const auto test_func = 82 const auto test_func =
72 regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; 83 regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
73 alpha_test_func.Assign(PackComparisonOp(test_func)); 84 alpha_test_func.Assign(PackComparisonOp(test_func));
74 early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); 85 early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
75 86 depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0);
87 depth_format.Assign(static_cast<u32>(regs.zeta.format));
88 y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0);
89 provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0);
90 conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0);
91 smooth_lines.Assign(regs.line_smooth_enable != 0 ? 1 : 0);
92
93 for (size_t i = 0; i < regs.rt.size(); ++i) {
94 color_formats[i] = static_cast<u8>(regs.rt[i].format);
95 }
76 alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref); 96 alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
77 point_size = Common::BitCast<u32>(regs.point_size); 97 point_size = Common::BitCast<u32>(regs.point_size);
78 98
79 if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { 99 if (maxwell3d.dirty.flags[Dirty::VertexInput]) {
80 maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; 100 if (has_dynamic_vertex_input) {
81 for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { 101 // Dirty flag will be reset by the command buffer update
82 const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); 102 static constexpr std::array LUT{
83 binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; 103 0u, // Invalid
84 } 104 1u, // SignedNorm
85 } 105 1u, // UnsignedNorm
86 if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) { 106 2u, // SignedInt
87 maxwell3d.dirty.flags[Dirty::VertexAttributes] = false; 107 3u, // UnsignedInt
88 for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { 108 1u, // UnsignedScaled
89 const auto& input = regs.vertex_attrib_format[index]; 109 1u, // SignedScaled
90 auto& attribute = attributes[index]; 110 1u, // Float
91 attribute.raw = 0; 111 };
92 attribute.enabled.Assign(input.IsConstant() ? 0 : 1); 112 const auto& attrs = regs.vertex_attrib_format;
93 attribute.buffer.Assign(input.buffer); 113 attribute_types = 0;
94 attribute.offset.Assign(input.offset); 114 for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
95 attribute.type.Assign(static_cast<u32>(input.type.Value())); 115 const u32 mask = attrs[i].constant != 0 ? 0 : 3;
96 attribute.size.Assign(static_cast<u32>(input.size.Value())); 116 const u32 type = LUT[static_cast<size_t>(attrs[i].type.Value())];
117 attribute_types |= static_cast<u64>(type & mask) << (i * 2);
118 }
119 } else {
120 maxwell3d.dirty.flags[Dirty::VertexInput] = false;
121 enabled_divisors = 0;
122 for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
123 const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index);
124 binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0;
125 enabled_divisors |= (is_enabled ? u64{1} : 0) << index;
126 }
127 for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
128 const auto& input = regs.vertex_attrib_format[index];
129 auto& attribute = attributes[index];
130 attribute.raw = 0;
131 attribute.enabled.Assign(input.constant ? 0 : 1);
132 attribute.buffer.Assign(input.buffer);
133 attribute.offset.Assign(input.offset);
134 attribute.type.Assign(static_cast<u32>(input.type.Value()));
135 attribute.size.Assign(static_cast<u32>(input.size.Value()));
136 }
97 } 137 }
98 } 138 }
99 if (maxwell3d.dirty.flags[Dirty::Blending]) { 139 if (maxwell3d.dirty.flags[Dirty::Blending]) {
@@ -109,10 +149,12 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
109 return static_cast<u16>(viewport.swizzle.raw); 149 return static_cast<u16>(viewport.swizzle.raw);
110 }); 150 });
111 } 151 }
112 if (!has_extended_dynamic_state) { 152 if (!extended_dynamic_state) {
113 no_extended_dynamic_state.Assign(1);
114 dynamic_state.Refresh(regs); 153 dynamic_state.Refresh(regs);
115 } 154 }
155 if (xfb_enabled) {
156 RefreshXfbState(xfb_state, regs);
157 }
116} 158}
117 159
118void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) { 160void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index a0eb83a68..c9be37935 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -12,6 +12,7 @@
12 12
13#include "video_core/engines/maxwell_3d.h" 13#include "video_core/engines/maxwell_3d.h"
14#include "video_core/surface.h" 14#include "video_core/surface.h"
15#include "video_core/transform_feedback.h"
15 16
16namespace Vulkan { 17namespace Vulkan {
17 18
@@ -60,7 +61,7 @@ struct FixedPipelineState {
60 61
61 void Refresh(const Maxwell& regs, size_t index); 62 void Refresh(const Maxwell& regs, size_t index);
62 63
63 constexpr std::array<bool, 4> Mask() const noexcept { 64 std::array<bool, 4> Mask() const noexcept {
64 return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; 65 return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
65 } 66 }
66 67
@@ -97,11 +98,11 @@ struct FixedPipelineState {
97 BitField<20, 3, u32> type; 98 BitField<20, 3, u32> type;
98 BitField<23, 6, u32> size; 99 BitField<23, 6, u32> size;
99 100
100 constexpr Maxwell::VertexAttribute::Type Type() const noexcept { 101 Maxwell::VertexAttribute::Type Type() const noexcept {
101 return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); 102 return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
102 } 103 }
103 104
104 constexpr Maxwell::VertexAttribute::Size Size() const noexcept { 105 Maxwell::VertexAttribute::Size Size() const noexcept {
105 return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); 106 return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
106 } 107 }
107 }; 108 };
@@ -167,37 +168,53 @@ struct FixedPipelineState {
167 168
168 union { 169 union {
169 u32 raw1; 170 u32 raw1;
170 BitField<0, 1, u32> no_extended_dynamic_state; 171 BitField<0, 1, u32> extended_dynamic_state;
171 BitField<2, 1, u32> primitive_restart_enable; 172 BitField<1, 1, u32> dynamic_vertex_input;
172 BitField<3, 1, u32> depth_bias_enable; 173 BitField<2, 1, u32> xfb_enabled;
173 BitField<4, 1, u32> depth_clamp_disabled; 174 BitField<3, 1, u32> primitive_restart_enable;
174 BitField<5, 1, u32> ndc_minus_one_to_one; 175 BitField<4, 1, u32> depth_bias_enable;
175 BitField<6, 2, u32> polygon_mode; 176 BitField<5, 1, u32> depth_clamp_disabled;
176 BitField<8, 5, u32> patch_control_points_minus_one; 177 BitField<6, 1, u32> ndc_minus_one_to_one;
177 BitField<13, 2, u32> tessellation_primitive; 178 BitField<7, 2, u32> polygon_mode;
178 BitField<15, 2, u32> tessellation_spacing; 179 BitField<9, 5, u32> patch_control_points_minus_one;
179 BitField<17, 1, u32> tessellation_clockwise; 180 BitField<14, 2, u32> tessellation_primitive;
180 BitField<18, 1, u32> logic_op_enable; 181 BitField<16, 2, u32> tessellation_spacing;
181 BitField<19, 4, u32> logic_op; 182 BitField<18, 1, u32> tessellation_clockwise;
182 BitField<23, 1, u32> rasterize_enable; 183 BitField<19, 1, u32> logic_op_enable;
184 BitField<20, 4, u32> logic_op;
183 BitField<24, 4, Maxwell::PrimitiveTopology> topology; 185 BitField<24, 4, Maxwell::PrimitiveTopology> topology;
184 BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; 186 BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
185 }; 187 };
186 union { 188 union {
187 u32 raw2; 189 u32 raw2;
188 BitField<0, 3, u32> alpha_test_func; 190 BitField<0, 1, u32> rasterize_enable;
189 BitField<3, 1, u32> early_z; 191 BitField<1, 3, u32> alpha_test_func;
192 BitField<4, 1, u32> early_z;
193 BitField<5, 1, u32> depth_enabled;
194 BitField<6, 5, u32> depth_format;
195 BitField<11, 1, u32> y_negate;
196 BitField<12, 1, u32> provoking_vertex_last;
197 BitField<13, 1, u32> conservative_raster_enable;
198 BitField<14, 1, u32> smooth_lines;
190 }; 199 };
200 std::array<u8, Maxwell::NumRenderTargets> color_formats;
191 201
192 u32 alpha_test_ref; 202 u32 alpha_test_ref;
193 u32 point_size; 203 u32 point_size;
194 std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
195 std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
196 std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; 204 std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
197 std::array<u16, Maxwell::NumViewports> viewport_swizzles; 205 std::array<u16, Maxwell::NumViewports> viewport_swizzles;
206 union {
207 u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state
208 u64 enabled_divisors;
209 };
210 std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
211 std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
212
198 DynamicState dynamic_state; 213 DynamicState dynamic_state;
214 VideoCommon::TransformFeedbackState xfb_state;
199 215
200 void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); 216 void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state,
217 bool has_dynamic_vertex_input);
201 218
202 size_t Hash() const noexcept; 219 size_t Hash() const noexcept;
203 220
@@ -208,8 +225,24 @@ struct FixedPipelineState {
208 } 225 }
209 226
210 size_t Size() const noexcept { 227 size_t Size() const noexcept {
211 const size_t total_size = sizeof *this; 228 if (xfb_enabled) {
212 return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState)); 229 // When transform feedback is enabled, use the whole struct
230 return sizeof(*this);
231 }
232 if (dynamic_vertex_input) {
233 // Exclude dynamic state and attributes
234 return offsetof(FixedPipelineState, attributes);
235 }
236 if (extended_dynamic_state) {
237 // Exclude dynamic state
238 return offsetof(FixedPipelineState, dynamic_state);
239 }
240 // Default
241 return offsetof(FixedPipelineState, xfb_state);
242 }
243
244 u32 DynamicAttributeType(size_t index) const noexcept {
245 return (attribute_types >> (index * 2)) & 0b11;
213 } 246 }
214}; 247};
215static_assert(std::has_unique_object_representations_v<FixedPipelineState>); 248static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index f088447e9..68a23b602 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -157,7 +157,7 @@ struct FormatTuple {
157 {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT 157 {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT
158 {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT 158 {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT
159 {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM 159 {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM
160 {VK_FORMAT_UNDEFINED}, // R16_SNORM 160 {VK_FORMAT_R16_SNORM, Attachable | Storage}, // R16_SNORM
161 {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT 161 {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT
162 {VK_FORMAT_UNDEFINED}, // R16_SINT 162 {VK_FORMAT_UNDEFINED}, // R16_SINT
163 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM 163 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM
@@ -266,19 +266,20 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
266 return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; 266 return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};
267} 267}
268 268
269VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { 269VkShaderStageFlagBits ShaderStage(Shader::Stage stage) {
270 switch (stage) { 270 switch (stage) {
271 case Tegra::Engines::ShaderType::Vertex: 271 case Shader::Stage::VertexA:
272 case Shader::Stage::VertexB:
272 return VK_SHADER_STAGE_VERTEX_BIT; 273 return VK_SHADER_STAGE_VERTEX_BIT;
273 case Tegra::Engines::ShaderType::TesselationControl: 274 case Shader::Stage::TessellationControl:
274 return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; 275 return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
275 case Tegra::Engines::ShaderType::TesselationEval: 276 case Shader::Stage::TessellationEval:
276 return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; 277 return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
277 case Tegra::Engines::ShaderType::Geometry: 278 case Shader::Stage::Geometry:
278 return VK_SHADER_STAGE_GEOMETRY_BIT; 279 return VK_SHADER_STAGE_GEOMETRY_BIT;
279 case Tegra::Engines::ShaderType::Fragment: 280 case Shader::Stage::Fragment:
280 return VK_SHADER_STAGE_FRAGMENT_BIT; 281 return VK_SHADER_STAGE_FRAGMENT_BIT;
281 case Tegra::Engines::ShaderType::Compute: 282 case Shader::Stage::Compute:
282 return VK_SHADER_STAGE_COMPUTE_BIT; 283 return VK_SHADER_STAGE_COMPUTE_BIT;
283 } 284 }
284 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage); 285 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage);
@@ -685,6 +686,19 @@ VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face) {
685 return {}; 686 return {};
686} 687}
687 688
689VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode) {
690 switch (polygon_mode) {
691 case Maxwell::PolygonMode::Point:
692 return VK_POLYGON_MODE_POINT;
693 case Maxwell::PolygonMode::Line:
694 return VK_POLYGON_MODE_LINE;
695 case Maxwell::PolygonMode::Fill:
696 return VK_POLYGON_MODE_FILL;
697 }
698 UNIMPLEMENTED_MSG("Unimplemented polygon mode={}", polygon_mode);
699 return {};
700}
701
688VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { 702VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
689 switch (swizzle) { 703 switch (swizzle) {
690 case Tegra::Texture::SwizzleSource::Zero: 704 case Tegra::Texture::SwizzleSource::Zero:
@@ -741,4 +755,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti
741 return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; 755 return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
742} 756}
743 757
758VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
759 switch (msaa_mode) {
760 case Tegra::Texture::MsaaMode::Msaa1x1:
761 return VK_SAMPLE_COUNT_1_BIT;
762 case Tegra::Texture::MsaaMode::Msaa2x1:
763 case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
764 return VK_SAMPLE_COUNT_2_BIT;
765 case Tegra::Texture::MsaaMode::Msaa2x2:
766 case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
767 case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
768 return VK_SAMPLE_COUNT_4_BIT;
769 case Tegra::Texture::MsaaMode::Msaa4x2:
770 case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
771 case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
772 case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
773 return VK_SAMPLE_COUNT_8_BIT;
774 case Tegra::Texture::MsaaMode::Msaa4x4:
775 return VK_SAMPLE_COUNT_16_BIT;
776 default:
777 UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
778 return VK_SAMPLE_COUNT_1_BIT;
779 }
780}
781
744} // namespace Vulkan::MaxwellToVK 782} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index e3e06ba38..8a9616039 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "shader_recompiler/stage.h"
8#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
9#include "video_core/surface.h" 10#include "video_core/surface.h"
10#include "video_core/textures/texture.h" 11#include "video_core/textures/texture.h"
@@ -45,7 +46,7 @@ struct FormatInfo {
45[[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb, 46[[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb,
46 PixelFormat pixel_format); 47 PixelFormat pixel_format);
47 48
48VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); 49VkShaderStageFlagBits ShaderStage(Shader::Stage stage);
49 50
50VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology); 51VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology);
51 52
@@ -65,10 +66,14 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face);
65 66
66VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face); 67VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face);
67 68
69VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode);
70
68VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); 71VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
69 72
70VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); 73VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
71 74
72VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); 75VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
73 76
77VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode);
78
74} // namespace Vulkan::MaxwellToVK 79} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
new file mode 100644
index 000000000..4847db6b6
--- /dev/null
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -0,0 +1,154 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8
9#include <boost/container/small_vector.hpp>
10
11#include "common/assert.h"
12#include "common/common_types.h"
13#include "shader_recompiler/shader_info.h"
14#include "video_core/renderer_vulkan/vk_texture_cache.h"
15#include "video_core/renderer_vulkan/vk_update_descriptor.h"
16#include "video_core/texture_cache/texture_cache.h"
17#include "video_core/texture_cache/types.h"
18#include "video_core/textures/texture.h"
19#include "video_core/vulkan_common/vulkan_device.h"
20
21namespace Vulkan {
22
23class DescriptorLayoutBuilder {
24public:
25 DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
26
27 bool CanUsePushDescriptor() const noexcept {
28 return device->IsKhrPushDescriptorSupported() &&
29 num_descriptors <= device->MaxPushDescriptors();
30 }
31
32 vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const {
33 if (bindings.empty()) {
34 return nullptr;
35 }
36 const VkDescriptorSetLayoutCreateFlags flags =
37 use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0;
38 return device->GetLogical().CreateDescriptorSetLayout({
39 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
40 .pNext = nullptr,
41 .flags = flags,
42 .bindingCount = static_cast<u32>(bindings.size()),
43 .pBindings = bindings.data(),
44 });
45 }
46
47 vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout,
48 VkPipelineLayout pipeline_layout,
49 bool use_push_descriptor) const {
50 if (entries.empty()) {
51 return nullptr;
52 }
53 const VkDescriptorUpdateTemplateType type =
54 use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR
55 : VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
56 return device->GetLogical().CreateDescriptorUpdateTemplateKHR({
57 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
58 .pNext = nullptr,
59 .flags = 0,
60 .descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
61 .pDescriptorUpdateEntries = entries.data(),
62 .templateType = type,
63 .descriptorSetLayout = descriptor_set_layout,
64 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
65 .pipelineLayout = pipeline_layout,
66 .set = 0,
67 });
68 }
69
70 vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const {
71 return device->GetLogical().CreatePipelineLayout({
72 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
73 .pNext = nullptr,
74 .flags = 0,
75 .setLayoutCount = descriptor_set_layout ? 1U : 0U,
76 .pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout,
77 .pushConstantRangeCount = 0,
78 .pPushConstantRanges = nullptr,
79 });
80 }
81
82 void Add(const Shader::Info& info, VkShaderStageFlags stage) {
83 Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors);
84 Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors);
85 Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors);
86 Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors);
87 Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors);
88 Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors);
89 }
90
91private:
92 template <typename Descriptors>
93 void Add(VkDescriptorType type, VkShaderStageFlags stage, const Descriptors& descriptors) {
94 const size_t num{descriptors.size()};
95 for (size_t i = 0; i < num; ++i) {
96 bindings.push_back({
97 .binding = binding,
98 .descriptorType = type,
99 .descriptorCount = descriptors[i].count,
100 .stageFlags = stage,
101 .pImmutableSamplers = nullptr,
102 });
103 entries.push_back({
104 .dstBinding = binding,
105 .dstArrayElement = 0,
106 .descriptorCount = descriptors[i].count,
107 .descriptorType = type,
108 .offset = offset,
109 .stride = sizeof(DescriptorUpdateEntry),
110 });
111 ++binding;
112 num_descriptors += descriptors[i].count;
113 offset += sizeof(DescriptorUpdateEntry);
114 }
115 }
116
117 const Device* device{};
118 boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
119 boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
120 u32 binding{};
121 u32 num_descriptors{};
122 size_t offset{};
123};
124
125inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers,
126 const ImageId*& image_view_ids, TextureCache& texture_cache,
127 VKUpdateDescriptorQueue& update_descriptor_queue) {
128 for (const auto& desc : info.texture_buffer_descriptors) {
129 image_view_ids += desc.count;
130 }
131 for (const auto& desc : info.image_buffer_descriptors) {
132 image_view_ids += desc.count;
133 }
134 for (const auto& desc : info.texture_descriptors) {
135 for (u32 index = 0; index < desc.count; ++index) {
136 const VkSampler sampler{*(samplers++)};
137 ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
138 const VkImageView vk_image_view{image_view.Handle(desc.type)};
139 update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
140 }
141 }
142 for (const auto& desc : info.image_descriptors) {
143 for (u32 index = 0; index < desc.count; ++index) {
144 ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
145 if (desc.is_written) {
146 texture_cache.MarkModification(image_view.image_id);
147 }
148 const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
149 update_descriptor_queue.AddImage(vk_image_view);
150 }
151 }
152}
153
154} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index bec3a81d9..a8d04dc61 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -130,35 +130,45 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
130 if (!framebuffer) { 130 if (!framebuffer) {
131 return; 131 return;
132 } 132 }
133 const auto& layout = render_window.GetFramebufferLayout(); 133 SCOPE_EXIT({ render_window.OnFrameDisplayed(); });
134 if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { 134 if (!render_window.IsShown()) {
135 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; 135 return;
136 const bool use_accelerated = 136 }
137 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); 137 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
138 const bool is_srgb = use_accelerated && screen_info.is_srgb; 138 const bool use_accelerated =
139 if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) { 139 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
140 swapchain.Create(layout.width, layout.height, is_srgb); 140 const bool is_srgb = use_accelerated && screen_info.is_srgb;
141 blit_screen.Recreate(); 141
142 } 142 bool has_been_recreated = false;
143 143 const auto recreate_swapchain = [&] {
144 scheduler.WaitWorker(); 144 if (!has_been_recreated) {
145 145 has_been_recreated = true;
146 while (!swapchain.AcquireNextImage()) { 146 scheduler.WaitWorker();
147 swapchain.Create(layout.width, layout.height, is_srgb);
148 blit_screen.Recreate();
149 } 147 }
150 const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); 148 const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
151 149 swapchain.Create(layout.width, layout.height, is_srgb);
152 scheduler.Flush(render_semaphore); 150 };
153 151 if (swapchain.IsSubOptimal() || swapchain.HasColorSpaceChanged(is_srgb)) {
154 if (swapchain.Present(render_semaphore)) { 152 recreate_swapchain();
155 blit_screen.Recreate(); 153 }
154 bool is_outdated;
155 do {
156 swapchain.AcquireNextImage();
157 is_outdated = swapchain.IsOutDated();
158 if (is_outdated) {
159 recreate_swapchain();
156 } 160 }
157 gpu.RendererFrameEndNotify(); 161 } while (is_outdated);
158 rasterizer.TickFrame(); 162 if (has_been_recreated) {
163 blit_screen.Recreate();
159 } 164 }
165 const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
166 scheduler.Flush(render_semaphore);
167 scheduler.WaitWorker();
168 swapchain.Present(render_semaphore);
160 169
161 render_window.OnFrameDisplayed(); 170 gpu.RendererFrameEndNotify();
171 rasterizer.TickFrame();
162} 172}
163 173
164void RendererVulkan::Report() const { 174void RendererVulkan::Report() const {
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 363134129..516f428e7 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -184,47 +184,43 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
184 .depth = 1, 184 .depth = 1,
185 }, 185 },
186 }; 186 };
187 scheduler.Record( 187 scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) {
188 [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) { 188 const VkImage image = *raw_images[image_index];
189 const VkImageMemoryBarrier base_barrier{ 189 const VkImageMemoryBarrier base_barrier{
190 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 190 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
191 .pNext = nullptr, 191 .pNext = nullptr,
192 .srcAccessMask = 0, 192 .srcAccessMask = 0,
193 .dstAccessMask = 0, 193 .dstAccessMask = 0,
194 .oldLayout = VK_IMAGE_LAYOUT_GENERAL, 194 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
195 .newLayout = VK_IMAGE_LAYOUT_GENERAL, 195 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
196 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 196 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
197 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 197 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
198 .image = image, 198 .image = image,
199 .subresourceRange = 199 .subresourceRange{
200 { 200 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
201 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 201 .baseMipLevel = 0,
202 .baseMipLevel = 0, 202 .levelCount = 1,
203 .levelCount = 1, 203 .baseArrayLayer = 0,
204 .baseArrayLayer = 0, 204 .layerCount = 1,
205 .layerCount = 1, 205 },
206 }, 206 };
207 }; 207 VkImageMemoryBarrier read_barrier = base_barrier;
208 VkImageMemoryBarrier read_barrier = base_barrier; 208 read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
209 read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; 209 read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
210 read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; 210 read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
211 read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; 211
212 212 VkImageMemoryBarrier write_barrier = base_barrier;
213 VkImageMemoryBarrier write_barrier = base_barrier; 213 write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
214 write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; 214 write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
215 write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; 215
216 216 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
217 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 217 read_barrier);
218 0, read_barrier); 218 cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
219 cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); 219 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
220 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, 220 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
221 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); 221 });
222 });
223 } 222 }
224 scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], 223 scheduler.Record([this, image_index, size = swapchain.GetSize()](vk::CommandBuffer cmdbuf) {
225 descriptor_set = descriptor_sets[image_index], buffer = *buffer,
226 size = swapchain.GetSize(), pipeline = *pipeline,
227 layout = *pipeline_layout](vk::CommandBuffer cmdbuf) {
228 const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; 224 const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
229 const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; 225 const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
230 const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; 226 const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
@@ -234,8 +230,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
234 const VkRenderPassBeginInfo renderpass_bi{ 230 const VkRenderPassBeginInfo renderpass_bi{
235 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, 231 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
236 .pNext = nullptr, 232 .pNext = nullptr,
237 .renderPass = renderpass, 233 .renderPass = *renderpass,
238 .framebuffer = framebuffer, 234 .framebuffer = *framebuffers[image_index],
239 .renderArea = 235 .renderArea =
240 { 236 {
241 .offset = {0, 0}, 237 .offset = {0, 0},
@@ -257,12 +253,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
257 .extent = size, 253 .extent = size,
258 }; 254 };
259 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); 255 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
260 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); 256 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
261 cmdbuf.SetViewport(0, viewport); 257 cmdbuf.SetViewport(0, viewport);
262 cmdbuf.SetScissor(0, scissor); 258 cmdbuf.SetScissor(0, scissor);
263 259
264 cmdbuf.BindVertexBuffer(0, buffer, offsetof(BufferData, vertices)); 260 cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
265 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {}); 261 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
262 descriptor_sets[image_index], {});
266 cmdbuf.Draw(4, 1, 0, 0); 263 cmdbuf.Draw(4, 1, 0, 0);
267 cmdbuf.EndRenderPass(); 264 cmdbuf.EndRenderPass();
268 }); 265 });
@@ -304,8 +301,7 @@ void VKBlitScreen::CreateShaders() {
304 301
305void VKBlitScreen::CreateSemaphores() { 302void VKBlitScreen::CreateSemaphores() {
306 semaphores.resize(image_count); 303 semaphores.resize(image_count);
307 std::generate(semaphores.begin(), semaphores.end(), 304 std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); });
308 [this] { return device.GetLogical().CreateSemaphore(); });
309} 305}
310 306
311void VKBlitScreen::CreateDescriptorPool() { 307void VKBlitScreen::CreateDescriptorPool() {
@@ -633,8 +629,8 @@ void VKBlitScreen::CreateFramebuffers() {
633} 629}
634 630
635void VKBlitScreen::ReleaseRawImages() { 631void VKBlitScreen::ReleaseRawImages() {
636 for (std::size_t i = 0; i < raw_images.size(); ++i) { 632 for (const u64 tick : resource_ticks) {
637 scheduler.Wait(resource_ticks.at(i)); 633 scheduler.Wait(tick);
638 } 634 }
639 raw_images.clear(); 635 raw_images.clear();
640 raw_buffer_commits.clear(); 636 raw_buffer_commits.clear();
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 0def1e769..f4b3ee95c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -60,38 +60,74 @@ std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
60 } 60 }
61 return indices; 61 return indices;
62} 62}
63} // Anonymous namespace
64
65Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
66 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
67 63
68Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, 64vk::Buffer CreateBuffer(const Device& device, u64 size) {
69 VAddr cpu_addr_, u64 size_bytes_) 65 VkBufferUsageFlags flags =
70 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { 66 VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
71 buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{ 67 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
68 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
69 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
70 if (device.IsExtTransformFeedbackSupported()) {
71 flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
72 }
73 return device.GetLogical().CreateBuffer({
72 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 74 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
73 .pNext = nullptr, 75 .pNext = nullptr,
74 .flags = 0, 76 .flags = 0,
75 .size = SizeBytes(), 77 .size = size,
76 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | 78 .usage = flags,
77 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
78 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
79 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
80 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
81 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 79 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
82 .queueFamilyIndexCount = 0, 80 .queueFamilyIndexCount = 0,
83 .pQueueFamilyIndices = nullptr, 81 .pQueueFamilyIndices = nullptr,
84 }); 82 });
83}
84} // Anonymous namespace
85
86Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
87 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
88
89Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
90 VAddr cpu_addr_, u64 size_bytes_)
91 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
92 device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())},
93 commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} {
85 if (runtime.device.HasDebuggingToolAttached()) { 94 if (runtime.device.HasDebuggingToolAttached()) {
86 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); 95 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
87 } 96 }
88 commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); 97}
98
99VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) {
100 if (!device) {
101 // Null buffer, return a null descriptor
102 return VK_NULL_HANDLE;
103 }
104 const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
105 return offset == view.offset && size == view.size && format == view.format;
106 })};
107 if (it != views.end()) {
108 return *it->handle;
109 }
110 views.push_back({
111 .offset = offset,
112 .size = size,
113 .format = format,
114 .handle = device->GetLogical().CreateBufferView({
115 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
116 .pNext = nullptr,
117 .flags = 0,
118 .buffer = *buffer,
119 .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Buffer, false, format).format,
120 .offset = offset,
121 .range = size,
122 }),
123 });
124 return *views.back().handle;
89} 125}
90 126
91BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, 127BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
92 VKScheduler& scheduler_, StagingBufferPool& staging_pool_, 128 VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
93 VKUpdateDescriptorQueue& update_descriptor_queue_, 129 VKUpdateDescriptorQueue& update_descriptor_queue_,
94 VKDescriptorPool& descriptor_pool) 130 DescriptorPool& descriptor_pool)
95 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, 131 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
96 staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, 132 staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
97 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 133 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 3bb81d5b3..c27402ff0 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -9,13 +9,14 @@
9#include "video_core/renderer_vulkan/vk_compute_pass.h" 9#include "video_core/renderer_vulkan/vk_compute_pass.h"
10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
11#include "video_core/renderer_vulkan/vk_update_descriptor.h" 11#include "video_core/renderer_vulkan/vk_update_descriptor.h"
12#include "video_core/surface.h"
12#include "video_core/vulkan_common/vulkan_memory_allocator.h" 13#include "video_core/vulkan_common/vulkan_memory_allocator.h"
13#include "video_core/vulkan_common/vulkan_wrapper.h" 14#include "video_core/vulkan_common/vulkan_wrapper.h"
14 15
15namespace Vulkan { 16namespace Vulkan {
16 17
17class Device; 18class Device;
18class VKDescriptorPool; 19class DescriptorPool;
19class VKScheduler; 20class VKScheduler;
20 21
21class BufferCacheRuntime; 22class BufferCacheRuntime;
@@ -26,6 +27,8 @@ public:
26 explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, 27 explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
27 VAddr cpu_addr_, u64 size_bytes_); 28 VAddr cpu_addr_, u64 size_bytes_);
28 29
30 [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
31
29 [[nodiscard]] VkBuffer Handle() const noexcept { 32 [[nodiscard]] VkBuffer Handle() const noexcept {
30 return *buffer; 33 return *buffer;
31 } 34 }
@@ -35,8 +38,17 @@ public:
35 } 38 }
36 39
37private: 40private:
41 struct BufferView {
42 u32 offset;
43 u32 size;
44 VideoCore::Surface::PixelFormat format;
45 vk::BufferView handle;
46 };
47
48 const Device* device{};
38 vk::Buffer buffer; 49 vk::Buffer buffer;
39 MemoryCommit commit; 50 MemoryCommit commit;
51 std::vector<BufferView> views;
40}; 52};
41 53
42class BufferCacheRuntime { 54class BufferCacheRuntime {
@@ -49,7 +61,7 @@ public:
49 explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_, 61 explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
50 VKScheduler& scheduler_, StagingBufferPool& staging_pool_, 62 VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
51 VKUpdateDescriptorQueue& update_descriptor_queue_, 63 VKUpdateDescriptorQueue& update_descriptor_queue_,
52 VKDescriptorPool& descriptor_pool); 64 DescriptorPool& descriptor_pool);
53 65
54 void Finish(); 66 void Finish();
55 67
@@ -87,6 +99,11 @@ public:
87 BindBuffer(buffer, offset, size); 99 BindBuffer(buffer, offset, size);
88 } 100 }
89 101
102 void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
103 VideoCore::Surface::PixelFormat format) {
104 update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
105 }
106
90private: 107private:
91 void BindBuffer(VkBuffer buffer, u32 offset, u32 size) { 108 void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
92 update_descriptor_queue.AddBuffer(buffer, offset, size); 109 update_descriptor_queue.AddBuffer(buffer, offset, size);
@@ -124,6 +141,7 @@ struct BufferCacheParams {
124 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; 141 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
125 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; 142 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
126 static constexpr bool USE_MEMORY_MAPS = true; 143 static constexpr bool USE_MEMORY_MAPS = true;
144 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
127}; 145};
128 146
129using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; 147using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 4181d83ee..8e426ce2c 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -41,80 +41,92 @@ constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2;
41constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3; 41constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3;
42constexpr size_t ASTC_NUM_BINDINGS = 4; 42constexpr size_t ASTC_NUM_BINDINGS = 4;
43 43
44VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { 44template <size_t size>
45 return { 45inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
46 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 46 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
47 .offset = 0, 47 .offset = 0,
48 .size = static_cast<u32>(size), 48 .size = static_cast<u32>(size),
49 }; 49};
50}
51
52std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
53 return {{
54 {
55 .binding = 0,
56 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
57 .descriptorCount = 1,
58 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
59 .pImmutableSamplers = nullptr,
60 },
61 {
62 .binding = 1,
63 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
64 .descriptorCount = 1,
65 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
66 .pImmutableSamplers = nullptr,
67 },
68 }};
69}
70 50
71std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> BuildASTCDescriptorSetBindings() { 51constexpr std::array<VkDescriptorSetLayoutBinding, 2> INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS{{
72 return {{ 52 {
73 { 53 .binding = 0,
74 .binding = ASTC_BINDING_INPUT_BUFFER, 54 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
75 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 55 .descriptorCount = 1,
76 .descriptorCount = 1, 56 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
77 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 57 .pImmutableSamplers = nullptr,
78 .pImmutableSamplers = nullptr, 58 },
79 }, 59 {
80 { 60 .binding = 1,
81 .binding = ASTC_BINDING_ENC_BUFFER, 61 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
82 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 62 .descriptorCount = 1,
83 .descriptorCount = 1, 63 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
84 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 64 .pImmutableSamplers = nullptr,
85 .pImmutableSamplers = nullptr, 65 },
86 }, 66}};
87 { 67
88 .binding = ASTC_BINDING_SWIZZLE_BUFFER, 68constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
89 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 69 .uniform_buffers = 0,
90 .descriptorCount = 1, 70 .storage_buffers = 2,
91 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 71 .texture_buffers = 0,
92 .pImmutableSamplers = nullptr, 72 .image_buffers = 0,
93 }, 73 .textures = 0,
94 { 74 .images = 0,
95 .binding = ASTC_BINDING_OUTPUT_IMAGE, 75 .score = 2,
96 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 76};
97 .descriptorCount = 1,
98 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
99 .pImmutableSamplers = nullptr,
100 },
101 }};
102}
103 77
104VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { 78constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDINGS{{
105 return { 79 {
106 .dstBinding = 0, 80 .binding = ASTC_BINDING_INPUT_BUFFER,
107 .dstArrayElement = 0,
108 .descriptorCount = 2,
109 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 81 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
110 .offset = 0, 82 .descriptorCount = 1,
111 .stride = sizeof(DescriptorUpdateEntry), 83 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
112 }; 84 .pImmutableSamplers = nullptr,
113} 85 },
86 {
87 .binding = ASTC_BINDING_ENC_BUFFER,
88 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
89 .descriptorCount = 1,
90 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
91 .pImmutableSamplers = nullptr,
92 },
93 {
94 .binding = ASTC_BINDING_SWIZZLE_BUFFER,
95 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
96 .descriptorCount = 1,
97 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
98 .pImmutableSamplers = nullptr,
99 },
100 {
101 .binding = ASTC_BINDING_OUTPUT_IMAGE,
102 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
103 .descriptorCount = 1,
104 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
105 .pImmutableSamplers = nullptr,
106 },
107}};
108
109constexpr DescriptorBankInfo ASTC_BANK_INFO{
110 .uniform_buffers = 0,
111 .storage_buffers = 3,
112 .texture_buffers = 0,
113 .image_buffers = 0,
114 .textures = 0,
115 .images = 1,
116 .score = 4,
117};
114 118
115std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS> 119constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
116BuildASTCPassDescriptorUpdateTemplateEntry() { 120 .dstBinding = 0,
117 return {{ 121 .dstArrayElement = 0,
122 .descriptorCount = 2,
123 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
124 .offset = 0,
125 .stride = sizeof(DescriptorUpdateEntry),
126};
127
128constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
129 ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{
118 { 130 {
119 .dstBinding = ASTC_BINDING_INPUT_BUFFER, 131 .dstBinding = ASTC_BINDING_INPUT_BUFFER,
120 .dstArrayElement = 0, 132 .dstArrayElement = 0,
@@ -148,7 +160,6 @@ BuildASTCPassDescriptorUpdateTemplateEntry() {
148 .stride = sizeof(DescriptorUpdateEntry), 160 .stride = sizeof(DescriptorUpdateEntry),
149 }, 161 },
150 }}; 162 }};
151}
152 163
153struct AstcPushConstants { 164struct AstcPushConstants {
154 std::array<u32, 2> blocks_dims; 165 std::array<u32, 2> blocks_dims;
@@ -159,14 +170,14 @@ struct AstcPushConstants {
159 u32 block_height; 170 u32 block_height;
160 u32 block_height_mask; 171 u32 block_height_mask;
161}; 172};
162
163} // Anonymous namespace 173} // Anonymous namespace
164 174
165VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, 175ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
166 vk::Span<VkDescriptorSetLayoutBinding> bindings, 176 vk::Span<VkDescriptorSetLayoutBinding> bindings,
167 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, 177 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
168 vk::Span<VkPushConstantRange> push_constants, 178 const DescriptorBankInfo& bank_info,
169 std::span<const u32> code) { 179 vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code)
180 : device{device_} {
170 descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ 181 descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
171 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 182 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
172 .pNext = nullptr, 183 .pNext = nullptr,
@@ -196,8 +207,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_
196 .pipelineLayout = *layout, 207 .pipelineLayout = *layout,
197 .set = 0, 208 .set = 0,
198 }); 209 });
199 210 descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info);
200 descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
201 } 211 }
202 module = device.GetLogical().CreateShaderModule({ 212 module = device.GetLogical().CreateShaderModule({
203 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 213 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
@@ -206,43 +216,34 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_
206 .codeSize = static_cast<u32>(code.size_bytes()), 216 .codeSize = static_cast<u32>(code.size_bytes()),
207 .pCode = code.data(), 217 .pCode = code.data(),
208 }); 218 });
219 device.SaveShader(code);
209 pipeline = device.GetLogical().CreateComputePipeline({ 220 pipeline = device.GetLogical().CreateComputePipeline({
210 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 221 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
211 .pNext = nullptr, 222 .pNext = nullptr,
212 .flags = 0, 223 .flags = 0,
213 .stage = 224 .stage{
214 { 225 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
215 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 226 .pNext = nullptr,
216 .pNext = nullptr, 227 .flags = 0,
217 .flags = 0, 228 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
218 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 229 .module = *module,
219 .module = *module, 230 .pName = "main",
220 .pName = "main", 231 .pSpecializationInfo = nullptr,
221 .pSpecializationInfo = nullptr, 232 },
222 },
223 .layout = *layout, 233 .layout = *layout,
224 .basePipelineHandle = nullptr, 234 .basePipelineHandle = nullptr,
225 .basePipelineIndex = 0, 235 .basePipelineIndex = 0,
226 }); 236 });
227} 237}
228 238
229VKComputePass::~VKComputePass() = default; 239ComputePass::~ComputePass() = default;
230 240
231VkDescriptorSet VKComputePass::CommitDescriptorSet( 241Uint8Pass::Uint8Pass(const Device& device_, VKScheduler& scheduler_,
232 VKUpdateDescriptorQueue& update_descriptor_queue) { 242 DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
233 if (!descriptor_template) {
234 return nullptr;
235 }
236 const VkDescriptorSet set = descriptor_allocator->Commit();
237 update_descriptor_queue.Send(*descriptor_template, set);
238 return set;
239}
240
241Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
242 VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
243 VKUpdateDescriptorQueue& update_descriptor_queue_) 243 VKUpdateDescriptorQueue& update_descriptor_queue_)
244 : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), 244 : ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
245 BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV), 245 INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {},
246 VULKAN_UINT8_COMP_SPV),
246 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, 247 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
247 update_descriptor_queue{update_descriptor_queue_} {} 248 update_descriptor_queue{update_descriptor_queue_} {}
248 249
@@ -256,11 +257,11 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
256 update_descriptor_queue.Acquire(); 257 update_descriptor_queue.Acquire();
257 update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); 258 update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
258 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); 259 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
259 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); 260 const void* const descriptor_data{update_descriptor_queue.UpdateData()};
261 const VkBuffer buffer{staging.buffer};
260 262
261 scheduler.RequestOutsideRenderPassOperationContext(); 263 scheduler.RequestOutsideRenderPassOperationContext();
262 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, 264 scheduler.Record([this, buffer, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) {
263 num_vertices](vk::CommandBuffer cmdbuf) {
264 static constexpr u32 DISPATCH_SIZE = 1024; 265 static constexpr u32 DISPATCH_SIZE = 1024;
265 static constexpr VkMemoryBarrier WRITE_BARRIER{ 266 static constexpr VkMemoryBarrier WRITE_BARRIER{
266 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, 267 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
@@ -268,8 +269,10 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
268 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, 269 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
269 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, 270 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
270 }; 271 };
271 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 272 const VkDescriptorSet set = descriptor_allocator.Commit();
272 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); 273 device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
274 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
275 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
273 cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); 276 cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
274 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 277 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
275 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); 278 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
@@ -278,12 +281,12 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
278} 281}
279 282
280QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, 283QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
281 VKDescriptorPool& descriptor_pool_, 284 DescriptorPool& descriptor_pool_,
282 StagingBufferPool& staging_buffer_pool_, 285 StagingBufferPool& staging_buffer_pool_,
283 VKUpdateDescriptorQueue& update_descriptor_queue_) 286 VKUpdateDescriptorQueue& update_descriptor_queue_)
284 : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), 287 : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
285 BuildInputOutputDescriptorUpdateTemplate(), 288 INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO,
286 BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV), 289 COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV),
287 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, 290 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
288 update_descriptor_queue{update_descriptor_queue_} {} 291 update_descriptor_queue{update_descriptor_queue_} {}
289 292
@@ -313,11 +316,11 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
313 update_descriptor_queue.Acquire(); 316 update_descriptor_queue.Acquire();
314 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); 317 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
315 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); 318 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
316 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); 319 const void* const descriptor_data{update_descriptor_queue.UpdateData()};
317 320
318 scheduler.RequestOutsideRenderPassOperationContext(); 321 scheduler.RequestOutsideRenderPassOperationContext();
319 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, 322 scheduler.Record([this, buffer = staging.buffer, descriptor_data, num_tri_vertices, base_vertex,
320 num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { 323 index_shift](vk::CommandBuffer cmdbuf) {
321 static constexpr u32 DISPATCH_SIZE = 1024; 324 static constexpr u32 DISPATCH_SIZE = 1024;
322 static constexpr VkMemoryBarrier WRITE_BARRIER{ 325 static constexpr VkMemoryBarrier WRITE_BARRIER{
323 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, 326 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
@@ -325,10 +328,12 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
325 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, 328 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
326 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, 329 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
327 }; 330 };
328 const std::array push_constants = {base_vertex, index_shift}; 331 const std::array push_constants{base_vertex, index_shift};
329 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 332 const VkDescriptorSet set = descriptor_allocator.Commit();
330 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); 333 device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
331 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), 334 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
335 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
336 cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
332 &push_constants); 337 &push_constants);
333 cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); 338 cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
334 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 339 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
@@ -338,15 +343,14 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
338} 343}
339 344
340ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, 345ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
341 VKDescriptorPool& descriptor_pool_, 346 DescriptorPool& descriptor_pool_,
342 StagingBufferPool& staging_buffer_pool_, 347 StagingBufferPool& staging_buffer_pool_,
343 VKUpdateDescriptorQueue& update_descriptor_queue_, 348 VKUpdateDescriptorQueue& update_descriptor_queue_,
344 MemoryAllocator& memory_allocator_) 349 MemoryAllocator& memory_allocator_)
345 : VKComputePass(device_, descriptor_pool_, BuildASTCDescriptorSetBindings(), 350 : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS,
346 BuildASTCPassDescriptorUpdateTemplateEntry(), 351 ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO,
347 BuildComputePushConstantRange(sizeof(AstcPushConstants)), 352 COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
348 ASTC_DECODER_COMP_SPV), 353 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
349 device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
350 update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} 354 update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {}
351 355
352ASTCDecoderPass::~ASTCDecoderPass() = default; 356ASTCDecoderPass::~ASTCDecoderPass() = default;
@@ -444,16 +448,14 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
444 update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES), 448 update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES),
445 sizeof(SWIZZLE_TABLE)); 449 sizeof(SWIZZLE_TABLE));
446 update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); 450 update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
447 451 const void* const descriptor_data{update_descriptor_queue.UpdateData()};
448 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
449 const VkPipelineLayout vk_layout = *layout;
450 452
451 // To unswizzle the ASTC data 453 // To unswizzle the ASTC data
452 const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); 454 const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
453 ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); 455 ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
454 ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); 456 ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
455 scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z, 457 scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
456 block_dims, params, set](vk::CommandBuffer cmdbuf) { 458 params, descriptor_data](vk::CommandBuffer cmdbuf) {
457 const AstcPushConstants uniforms{ 459 const AstcPushConstants uniforms{
458 .blocks_dims = block_dims, 460 .blocks_dims = block_dims,
459 .bytes_per_block_log2 = params.bytes_per_block_log2, 461 .bytes_per_block_log2 = params.bytes_per_block_log2,
@@ -463,8 +465,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
463 .block_height = params.block_height, 465 .block_height = params.block_height,
464 .block_height_mask = params.block_height_mask, 466 .block_height_mask = params.block_height_mask,
465 }; 467 };
466 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {}); 468 const VkDescriptorSet set = descriptor_allocator.Commit();
467 cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); 469 device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
470 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
471 cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
468 cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); 472 cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z);
469 }); 473 });
470 } 474 }
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 5ea187c30..114aef2bd 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -4,7 +4,6 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <optional>
8#include <span> 7#include <span>
9#include <utility> 8#include <utility>
10 9
@@ -27,31 +26,31 @@ class VKUpdateDescriptorQueue;
27class Image; 26class Image;
28struct StagingBufferRef; 27struct StagingBufferRef;
29 28
30class VKComputePass { 29class ComputePass {
31public: 30public:
32 explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, 31 explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool,
33 vk::Span<VkDescriptorSetLayoutBinding> bindings, 32 vk::Span<VkDescriptorSetLayoutBinding> bindings,
34 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, 33 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
35 vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code); 34 const DescriptorBankInfo& bank_info,
36 ~VKComputePass(); 35 vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
36 ~ComputePass();
37 37
38protected: 38protected:
39 VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue); 39 const Device& device;
40
41 vk::DescriptorUpdateTemplateKHR descriptor_template; 40 vk::DescriptorUpdateTemplateKHR descriptor_template;
42 vk::PipelineLayout layout; 41 vk::PipelineLayout layout;
43 vk::Pipeline pipeline; 42 vk::Pipeline pipeline;
43 vk::DescriptorSetLayout descriptor_set_layout;
44 DescriptorAllocator descriptor_allocator;
44 45
45private: 46private:
46 vk::DescriptorSetLayout descriptor_set_layout;
47 std::optional<DescriptorAllocator> descriptor_allocator;
48 vk::ShaderModule module; 47 vk::ShaderModule module;
49}; 48};
50 49
51class Uint8Pass final : public VKComputePass { 50class Uint8Pass final : public ComputePass {
52public: 51public:
53 explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, 52 explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
54 VKDescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, 53 DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
55 VKUpdateDescriptorQueue& update_descriptor_queue_); 54 VKUpdateDescriptorQueue& update_descriptor_queue_);
56 ~Uint8Pass(); 55 ~Uint8Pass();
57 56
@@ -66,10 +65,10 @@ private:
66 VKUpdateDescriptorQueue& update_descriptor_queue; 65 VKUpdateDescriptorQueue& update_descriptor_queue;
67}; 66};
68 67
69class QuadIndexedPass final : public VKComputePass { 68class QuadIndexedPass final : public ComputePass {
70public: 69public:
71 explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, 70 explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
72 VKDescriptorPool& descriptor_pool_, 71 DescriptorPool& descriptor_pool_,
73 StagingBufferPool& staging_buffer_pool_, 72 StagingBufferPool& staging_buffer_pool_,
74 VKUpdateDescriptorQueue& update_descriptor_queue_); 73 VKUpdateDescriptorQueue& update_descriptor_queue_);
75 ~QuadIndexedPass(); 74 ~QuadIndexedPass();
@@ -84,10 +83,10 @@ private:
84 VKUpdateDescriptorQueue& update_descriptor_queue; 83 VKUpdateDescriptorQueue& update_descriptor_queue;
85}; 84};
86 85
87class ASTCDecoderPass final : public VKComputePass { 86class ASTCDecoderPass final : public ComputePass {
88public: 87public:
89 explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, 88 explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
90 VKDescriptorPool& descriptor_pool_, 89 DescriptorPool& descriptor_pool_,
91 StagingBufferPool& staging_buffer_pool_, 90 StagingBufferPool& staging_buffer_pool_,
92 VKUpdateDescriptorQueue& update_descriptor_queue_, 91 VKUpdateDescriptorQueue& update_descriptor_queue_,
93 MemoryAllocator& memory_allocator_); 92 MemoryAllocator& memory_allocator_);
@@ -99,7 +98,6 @@ public:
99private: 98private:
100 void MakeDataBuffer(); 99 void MakeDataBuffer();
101 100
102 const Device& device;
103 VKScheduler& scheduler; 101 VKScheduler& scheduler;
104 StagingBufferPool& staging_buffer_pool; 102 StagingBufferPool& staging_buffer_pool;
105 VKUpdateDescriptorQueue& update_descriptor_queue; 103 VKUpdateDescriptorQueue& update_descriptor_queue;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 3a48219b7..70b84c7a6 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -2,152 +2,198 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <vector> 6#include <vector>
6 7
8#include <boost/container/small_vector.hpp>
9
10#include "video_core/renderer_vulkan/pipeline_helper.h"
11#include "video_core/renderer_vulkan/vk_buffer_cache.h"
7#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 12#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
8#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 13#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
9#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 14#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
10#include "video_core/renderer_vulkan/vk_scheduler.h" 15#include "video_core/renderer_vulkan/vk_scheduler.h"
11#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
12#include "video_core/renderer_vulkan/vk_update_descriptor.h" 16#include "video_core/renderer_vulkan/vk_update_descriptor.h"
17#include "video_core/shader_notify.h"
13#include "video_core/vulkan_common/vulkan_device.h" 18#include "video_core/vulkan_common/vulkan_device.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h" 19#include "video_core/vulkan_common/vulkan_wrapper.h"
15 20
16namespace Vulkan { 21namespace Vulkan {
17 22
18VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, 23using Shader::ImageBufferDescriptor;
19 VKDescriptorPool& descriptor_pool_, 24using Tegra::Texture::TexturePair;
20 VKUpdateDescriptorQueue& update_descriptor_queue_, 25
21 const SPIRVShader& shader_) 26ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
22 : device{device_}, scheduler{scheduler_}, entries{shader_.entries}, 27 VKUpdateDescriptorQueue& update_descriptor_queue_,
23 descriptor_set_layout{CreateDescriptorSetLayout()}, 28 Common::ThreadWorker* thread_worker,
24 descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, 29 VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_,
25 update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, 30 vk::ShaderModule spv_module_)
26 descriptor_template{CreateDescriptorUpdateTemplate()}, 31 : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_},
27 shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {} 32 spv_module(std::move(spv_module_)) {
28 33 if (shader_notify) {
29VKComputePipeline::~VKComputePipeline() = default; 34 shader_notify->MarkShaderBuilding();
30
31VkDescriptorSet VKComputePipeline::CommitDescriptorSet() {
32 if (!descriptor_template) {
33 return {};
34 }
35 const VkDescriptorSet set = descriptor_allocator.Commit();
36 update_descriptor_queue.Send(*descriptor_template, set);
37 return set;
38}
39
40vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
41 std::vector<VkDescriptorSetLayoutBinding> bindings;
42 u32 binding = 0;
43 const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) {
44 // TODO(Rodrigo): Maybe make individual bindings here?
45 for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
46 bindings.push_back({
47 .binding = binding++,
48 .descriptorType = descriptor_type,
49 .descriptorCount = 1,
50 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
51 .pImmutableSamplers = nullptr,
52 });
53 }
54 };
55 add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
56 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
57 add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size());
58 add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
59 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
60 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
61
62 return device.GetLogical().CreateDescriptorSetLayout({
63 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
64 .pNext = nullptr,
65 .flags = 0,
66 .bindingCount = static_cast<u32>(bindings.size()),
67 .pBindings = bindings.data(),
68 });
69}
70
71vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const {
72 return device.GetLogical().CreatePipelineLayout({
73 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
74 .pNext = nullptr,
75 .flags = 0,
76 .setLayoutCount = 1,
77 .pSetLayouts = descriptor_set_layout.address(),
78 .pushConstantRangeCount = 0,
79 .pPushConstantRanges = nullptr,
80 });
81}
82
83vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const {
84 std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries;
85 u32 binding = 0;
86 u32 offset = 0;
87 FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries);
88 if (template_entries.empty()) {
89 // If the shader doesn't use descriptor sets, skip template creation.
90 return {};
91 } 35 }
92 36 std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
93 return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ 37 uniform_buffer_sizes.begin());
94 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, 38
95 .pNext = nullptr, 39 auto func{[this, &descriptor_pool, shader_notify] {
96 .flags = 0, 40 DescriptorLayoutBuilder builder{device};
97 .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), 41 builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
98 .pDescriptorUpdateEntries = template_entries.data(), 42
99 .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, 43 descriptor_set_layout = builder.CreateDescriptorSetLayout(false);
100 .descriptorSetLayout = *descriptor_set_layout, 44 pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout);
101 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, 45 descriptor_update_template =
102 .pipelineLayout = *layout, 46 builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false);
103 .set = DESCRIPTOR_SET, 47 descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
104 }); 48 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
105} 49 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
106 50 .pNext = nullptr,
107vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { 51 .requiredSubgroupSize = GuestWarpSize,
108 device.SaveShader(code); 52 };
109 53 pipeline = device.GetLogical().CreateComputePipeline({
110 return device.GetLogical().CreateShaderModule({ 54 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
111 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 55 .pNext = nullptr,
112 .pNext = nullptr, 56 .flags = 0,
113 .flags = 0, 57 .stage{
114 .codeSize = code.size() * sizeof(u32),
115 .pCode = code.data(),
116 });
117}
118
119vk::Pipeline VKComputePipeline::CreatePipeline() const {
120
121 VkComputePipelineCreateInfo ci{
122 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
123 .pNext = nullptr,
124 .flags = 0,
125 .stage =
126 {
127 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 58 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
128 .pNext = nullptr, 59 .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
129 .flags = 0, 60 .flags = 0,
130 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 61 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
131 .module = *shader_module, 62 .module = *spv_module,
132 .pName = "main", 63 .pName = "main",
133 .pSpecializationInfo = nullptr, 64 .pSpecializationInfo = nullptr,
134 }, 65 },
135 .layout = *layout, 66 .layout = *pipeline_layout,
136 .basePipelineHandle = nullptr, 67 .basePipelineHandle = 0,
137 .basePipelineIndex = 0, 68 .basePipelineIndex = 0,
138 }; 69 });
139 70 std::lock_guard lock{build_mutex};
140 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ 71 is_built = true;
141 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, 72 build_condvar.notify_one();
142 .pNext = nullptr, 73 if (shader_notify) {
143 .requiredSubgroupSize = GuestWarpSize, 74 shader_notify->MarkShaderComplete();
144 }; 75 }
145 76 }};
146 if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { 77 if (thread_worker) {
147 ci.stage.pNext = &subgroup_size_ci; 78 thread_worker->QueueWork(std::move(func));
79 } else {
80 func();
81 }
82}
83
84void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
85 Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler,
86 BufferCache& buffer_cache, TextureCache& texture_cache) {
87 update_descriptor_queue.Acquire();
88
89 buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
90 buffer_cache.UnbindComputeStorageBuffers();
91 size_t ssbo_index{};
92 for (const auto& desc : info.storage_buffers_descriptors) {
93 ASSERT(desc.count == 1);
94 buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
95 desc.is_written);
96 ++ssbo_index;
148 } 97 }
149 98
150 return device.GetLogical().CreateComputePipeline(ci); 99 texture_cache.SynchronizeComputeDescriptors();
100
101 static constexpr size_t max_elements = 64;
102 std::array<ImageId, max_elements> image_view_ids;
103 boost::container::static_vector<u32, max_elements> image_view_indices;
104 boost::container::static_vector<VkSampler, max_elements> samplers;
105
106 const auto& qmd{kepler_compute.launch_description};
107 const auto& cbufs{qmd.const_buffer_config};
108 const bool via_header_index{qmd.linked_tsc != 0};
109 const auto read_handle{[&](const auto& desc, u32 index) {
110 ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
111 const u32 index_offset{index << desc.size_shift};
112 const u32 offset{desc.cbuf_offset + index_offset};
113 const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
114 if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
115 std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
116 if (desc.has_secondary) {
117 ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
118 const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
119 const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
120 secondary_offset};
121 const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
122 const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
123 return TexturePair(lhs_raw | rhs_raw, via_header_index);
124 }
125 }
126 return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
127 }};
128 const auto add_image{[&](const auto& desc) {
129 for (u32 index = 0; index < desc.count; ++index) {
130 const auto handle{read_handle(desc, index)};
131 image_view_indices.push_back(handle.first);
132 }
133 }};
134 std::ranges::for_each(info.texture_buffer_descriptors, add_image);
135 std::ranges::for_each(info.image_buffer_descriptors, add_image);
136 for (const auto& desc : info.texture_descriptors) {
137 for (u32 index = 0; index < desc.count; ++index) {
138 const auto handle{read_handle(desc, index)};
139 image_view_indices.push_back(handle.first);
140
141 Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
142 samplers.push_back(sampler->Handle());
143 }
144 }
145 std::ranges::for_each(info.image_descriptors, add_image);
146
147 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
148 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
149
150 buffer_cache.UnbindComputeTextureBuffers();
151 ImageId* texture_buffer_ids{image_view_ids.data()};
152 size_t index{};
153 const auto add_buffer{[&](const auto& desc) {
154 constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
155 for (u32 i = 0; i < desc.count; ++i) {
156 bool is_written{false};
157 if constexpr (is_image) {
158 is_written = desc.is_written;
159 }
160 ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids);
161 buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
162 image_view.BufferSize(), image_view.format,
163 is_written, is_image);
164 ++texture_buffer_ids;
165 ++index;
166 }
167 }};
168 std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
169 std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
170
171 buffer_cache.UpdateComputeBuffers();
172 buffer_cache.BindHostComputeBuffers();
173
174 const VkSampler* samplers_it{samplers.data()};
175 const ImageId* views_it{image_view_ids.data()};
176 PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue);
177
178 if (!is_built.load(std::memory_order::relaxed)) {
179 // Wait for the pipeline to be built
180 scheduler.Record([this](vk::CommandBuffer) {
181 std::unique_lock lock{build_mutex};
182 build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
183 });
184 }
185 const void* const descriptor_data{update_descriptor_queue.UpdateData()};
186 scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) {
187 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
188 if (!descriptor_set_layout) {
189 return;
190 }
191 const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
192 const vk::Device& dev{device.GetLogical()};
193 dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
194 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
195 descriptor_set, nullptr);
196 });
151} 197}
152 198
153} // namespace Vulkan 199} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 7e16575ac..52fec04d3 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -4,61 +4,63 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <atomic>
8#include <condition_variable>
9#include <mutex>
10
7#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/thread_worker.h"
13#include "shader_recompiler/shader_info.h"
14#include "video_core/memory_manager.h"
15#include "video_core/renderer_vulkan/vk_buffer_cache.h"
8#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 16#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
9#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 17#include "video_core/renderer_vulkan/vk_texture_cache.h"
18#include "video_core/renderer_vulkan/vk_update_descriptor.h"
10#include "video_core/vulkan_common/vulkan_wrapper.h" 19#include "video_core/vulkan_common/vulkan_wrapper.h"
11 20
21namespace VideoCore {
22class ShaderNotify;
23}
24
12namespace Vulkan { 25namespace Vulkan {
13 26
14class Device; 27class Device;
15class VKScheduler; 28class VKScheduler;
16class VKUpdateDescriptorQueue;
17 29
18class VKComputePipeline final { 30class ComputePipeline {
19public: 31public:
20 explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, 32 explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
21 VKDescriptorPool& descriptor_pool_, 33 VKUpdateDescriptorQueue& update_descriptor_queue,
22 VKUpdateDescriptorQueue& update_descriptor_queue_, 34 Common::ThreadWorker* thread_worker,
23 const SPIRVShader& shader_); 35 VideoCore::ShaderNotify* shader_notify, const Shader::Info& info,
24 ~VKComputePipeline(); 36 vk::ShaderModule spv_module);
25
26 VkDescriptorSet CommitDescriptorSet();
27 37
28 VkPipeline GetHandle() const { 38 ComputePipeline& operator=(ComputePipeline&&) noexcept = delete;
29 return *pipeline; 39 ComputePipeline(ComputePipeline&&) noexcept = delete;
30 }
31 40
32 VkPipelineLayout GetLayout() const { 41 ComputePipeline& operator=(const ComputePipeline&) = delete;
33 return *layout; 42 ComputePipeline(const ComputePipeline&) = delete;
34 }
35 43
36 const ShaderEntries& GetEntries() const { 44 void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory,
37 return entries; 45 VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache);
38 }
39 46
40private: 47private:
41 vk::DescriptorSetLayout CreateDescriptorSetLayout() const;
42
43 vk::PipelineLayout CreatePipelineLayout() const;
44
45 vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const;
46
47 vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const;
48
49 vk::Pipeline CreatePipeline() const;
50
51 const Device& device; 48 const Device& device;
52 VKScheduler& scheduler; 49 VKUpdateDescriptorQueue& update_descriptor_queue;
53 ShaderEntries entries; 50 Shader::Info info;
54 51
52 VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
53
54 vk::ShaderModule spv_module;
55 vk::DescriptorSetLayout descriptor_set_layout; 55 vk::DescriptorSetLayout descriptor_set_layout;
56 DescriptorAllocator descriptor_allocator; 56 DescriptorAllocator descriptor_allocator;
57 VKUpdateDescriptorQueue& update_descriptor_queue; 57 vk::PipelineLayout pipeline_layout;
58 vk::PipelineLayout layout; 58 vk::DescriptorUpdateTemplateKHR descriptor_update_template;
59 vk::DescriptorUpdateTemplateKHR descriptor_template;
60 vk::ShaderModule shader_module;
61 vk::Pipeline pipeline; 59 vk::Pipeline pipeline;
60
61 std::condition_variable build_condvar;
62 std::mutex build_mutex;
63 std::atomic_bool is_built{false};
62}; 64};
63 65
64} // namespace Vulkan 66} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index ef9fb5910..8e77e4796 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <mutex>
6#include <span>
5#include <vector> 7#include <vector>
6 8
7#include "common/common_types.h" 9#include "common/common_types.h"
@@ -13,79 +15,149 @@
13 15
14namespace Vulkan { 16namespace Vulkan {
15 17
16// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines. 18// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines
17constexpr std::size_t SETS_GROW_RATE = 0x20; 19constexpr size_t SETS_GROW_RATE = 16;
20constexpr s32 SCORE_THRESHOLD = 3;
21constexpr u32 SETS_PER_POOL = 64;
18 22
19DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, 23struct DescriptorBank {
20 VkDescriptorSetLayout layout_) 24 DescriptorBankInfo info;
21 : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), 25 std::vector<vk::DescriptorPool> pools;
22 descriptor_pool{descriptor_pool_}, layout{layout_} {} 26};
23 27
24DescriptorAllocator::~DescriptorAllocator() = default; 28bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept {
29 return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers &&
30 texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers &&
31 textures >= subset.textures && images >= subset.image_buffers;
32}
25 33
26VkDescriptorSet DescriptorAllocator::Commit() { 34template <typename Descriptors>
27 const std::size_t index = CommitResource(); 35static u32 Accumulate(const Descriptors& descriptors) {
28 return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; 36 u32 count = 0;
37 for (const auto& descriptor : descriptors) {
38 count += descriptor.count;
39 }
40 return count;
29} 41}
30 42
31void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { 43static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) {
32 descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); 44 DescriptorBankInfo bank;
45 for (const Shader::Info& info : infos) {
46 bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors);
47 bank.storage_buffers += Accumulate(info.storage_buffers_descriptors);
48 bank.texture_buffers += Accumulate(info.texture_buffer_descriptors);
49 bank.image_buffers += Accumulate(info.image_buffer_descriptors);
50 bank.textures += Accumulate(info.texture_descriptors);
51 bank.images += Accumulate(info.image_descriptors);
52 }
53 bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers +
54 bank.image_buffers + bank.textures + bank.images;
55 return bank;
33} 56}
34 57
35VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) 58static void AllocatePool(const Device& device, DescriptorBank& bank) {
36 : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{ 59 std::array<VkDescriptorPoolSize, 6> pool_sizes;
37 AllocateNewPool()} {} 60 size_t pool_cursor{};
38 61 const auto add = [&](VkDescriptorType type, u32 count) {
39VKDescriptorPool::~VKDescriptorPool() = default; 62 if (count > 0) {
40 63 pool_sizes[pool_cursor++] = {
41vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { 64 .type = type,
42 static constexpr u32 num_sets = 0x20000; 65 .descriptorCount = count * SETS_PER_POOL,
43 static constexpr VkDescriptorPoolSize pool_sizes[] = { 66 };
44 {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90}, 67 }
45 {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
46 {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
47 {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
48 {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
49 {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40},
50 }; 68 };
51 69 const auto& info{bank.info};
52 const VkDescriptorPoolCreateInfo ci{ 70 add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, info.uniform_buffers);
71 add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info.storage_buffers);
72 add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, info.texture_buffers);
73 add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, info.image_buffers);
74 add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, info.textures);
75 add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, info.images);
76 bank.pools.push_back(device.GetLogical().CreateDescriptorPool({
53 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, 77 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
54 .pNext = nullptr, 78 .pNext = nullptr,
55 .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 79 .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
56 .maxSets = num_sets, 80 .maxSets = SETS_PER_POOL,
57 .poolSizeCount = static_cast<u32>(std::size(pool_sizes)), 81 .poolSizeCount = static_cast<u32>(pool_cursor),
58 .pPoolSizes = std::data(pool_sizes), 82 .pPoolSizes = std::data(pool_sizes),
59 }; 83 }));
60 return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci)); 84}
85
86DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
87 DescriptorBank& bank_, VkDescriptorSetLayout layout_)
88 : ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_},
89 layout{layout_} {}
90
91VkDescriptorSet DescriptorAllocator::Commit() {
92 const size_t index = CommitResource();
93 return sets[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
61} 94}
62 95
63vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout, 96void DescriptorAllocator::Allocate(size_t begin, size_t end) {
64 std::size_t count) { 97 sets.push_back(AllocateDescriptors(end - begin));
65 const std::vector layout_copies(count, layout); 98}
66 VkDescriptorSetAllocateInfo ai{ 99
100vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) {
101 const std::vector<VkDescriptorSetLayout> layouts(count, layout);
102 VkDescriptorSetAllocateInfo allocate_info{
67 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, 103 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
68 .pNext = nullptr, 104 .pNext = nullptr,
69 .descriptorPool = **active_pool, 105 .descriptorPool = *bank->pools.back(),
70 .descriptorSetCount = static_cast<u32>(count), 106 .descriptorSetCount = static_cast<u32>(count),
71 .pSetLayouts = layout_copies.data(), 107 .pSetLayouts = layouts.data(),
72 }; 108 };
73 109 vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info);
74 vk::DescriptorSets sets = active_pool->Allocate(ai); 110 if (!new_sets.IsOutOfPoolMemory()) {
75 if (!sets.IsOutOfPoolMemory()) { 111 return new_sets;
76 return sets;
77 } 112 }
78
79 // Our current pool is out of memory. Allocate a new one and retry 113 // Our current pool is out of memory. Allocate a new one and retry
80 active_pool = AllocateNewPool(); 114 AllocatePool(*device, *bank);
81 ai.descriptorPool = **active_pool; 115 allocate_info.descriptorPool = *bank->pools.back();
82 sets = active_pool->Allocate(ai); 116 new_sets = bank->pools.back().Allocate(allocate_info);
83 if (!sets.IsOutOfPoolMemory()) { 117 if (!new_sets.IsOutOfPoolMemory()) {
84 return sets; 118 return new_sets;
85 } 119 }
86
87 // After allocating a new pool, we are out of memory again. We can't handle this from here. 120 // After allocating a new pool, we are out of memory again. We can't handle this from here.
88 throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY); 121 throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY);
89} 122}
90 123
124DescriptorPool::DescriptorPool(const Device& device_, VKScheduler& scheduler)
125 : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {}
126
127DescriptorPool::~DescriptorPool() = default;
128
129DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
130 std::span<const Shader::Info> infos) {
131 return Allocator(layout, MakeBankInfo(infos));
132}
133
134DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
135 const Shader::Info& info) {
136 return Allocator(layout, MakeBankInfo(std::array{info}));
137}
138
139DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
140 const DescriptorBankInfo& info) {
141 return DescriptorAllocator(device, master_semaphore, Bank(info), layout);
142}
143
144DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) {
145 std::shared_lock read_lock{banks_mutex};
146 const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) {
147 return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs);
148 });
149 if (it != bank_infos.end()) {
150 return *banks[std::distance(bank_infos.begin(), it)].get();
151 }
152 read_lock.unlock();
153
154 std::unique_lock write_lock{banks_mutex};
155 bank_infos.push_back(reqs);
156
157 auto& bank = *banks.emplace_back(std::make_unique<DescriptorBank>());
158 bank.info = reqs;
159 AllocatePool(device, bank);
160 return bank;
161}
162
91} // namespace Vulkan 163} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
index f892be7be..59466aac5 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -4,57 +4,85 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <shared_mutex>
8#include <span>
7#include <vector> 9#include <vector>
8 10
11#include "shader_recompiler/shader_info.h"
9#include "video_core/renderer_vulkan/vk_resource_pool.h" 12#include "video_core/renderer_vulkan/vk_resource_pool.h"
10#include "video_core/vulkan_common/vulkan_wrapper.h" 13#include "video_core/vulkan_common/vulkan_wrapper.h"
11 14
12namespace Vulkan { 15namespace Vulkan {
13 16
14class Device; 17class Device;
15class VKDescriptorPool;
16class VKScheduler; 18class VKScheduler;
17 19
20struct DescriptorBank;
21
22struct DescriptorBankInfo {
23 [[nodiscard]] bool IsSuperset(const DescriptorBankInfo& subset) const noexcept;
24
25 u32 uniform_buffers{}; ///< Number of uniform buffer descriptors
26 u32 storage_buffers{}; ///< Number of storage buffer descriptors
27 u32 texture_buffers{}; ///< Number of texture buffer descriptors
28 u32 image_buffers{}; ///< Number of image buffer descriptors
29 u32 textures{}; ///< Number of texture descriptors
30 u32 images{}; ///< Number of image descriptors
31 s32 score{}; ///< Number of descriptors in total
32};
33
18class DescriptorAllocator final : public ResourcePool { 34class DescriptorAllocator final : public ResourcePool {
35 friend class DescriptorPool;
36
19public: 37public:
20 explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); 38 explicit DescriptorAllocator() = default;
21 ~DescriptorAllocator() override; 39 ~DescriptorAllocator() override = default;
40
41 DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default;
42 DescriptorAllocator(DescriptorAllocator&&) noexcept = default;
22 43
23 DescriptorAllocator& operator=(const DescriptorAllocator&) = delete; 44 DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
24 DescriptorAllocator(const DescriptorAllocator&) = delete; 45 DescriptorAllocator(const DescriptorAllocator&) = delete;
25 46
26 VkDescriptorSet Commit(); 47 VkDescriptorSet Commit();
27 48
28protected:
29 void Allocate(std::size_t begin, std::size_t end) override;
30
31private: 49private:
32 VKDescriptorPool& descriptor_pool; 50 explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
33 const VkDescriptorSetLayout layout; 51 DescriptorBank& bank_, VkDescriptorSetLayout layout_);
34 52
35 std::vector<vk::DescriptorSets> descriptors_allocations; 53 void Allocate(size_t begin, size_t end) override;
36}; 54
55 vk::DescriptorSets AllocateDescriptors(size_t count);
56
57 const Device* device{};
58 DescriptorBank* bank{};
59 VkDescriptorSetLayout layout{};
37 60
38class VKDescriptorPool final { 61 std::vector<vk::DescriptorSets> sets;
39 friend DescriptorAllocator; 62};
40 63
64class DescriptorPool {
41public: 65public:
42 explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler); 66 explicit DescriptorPool(const Device& device, VKScheduler& scheduler);
43 ~VKDescriptorPool(); 67 ~DescriptorPool();
44 68
45 VKDescriptorPool(const VKDescriptorPool&) = delete; 69 DescriptorPool& operator=(const DescriptorPool&) = delete;
46 VKDescriptorPool& operator=(const VKDescriptorPool&) = delete; 70 DescriptorPool(const DescriptorPool&) = delete;
47 71
48private: 72 DescriptorAllocator Allocator(VkDescriptorSetLayout layout,
49 vk::DescriptorPool* AllocateNewPool(); 73 std::span<const Shader::Info> infos);
74 DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info);
75 DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info);
50 76
51 vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); 77private:
78 DescriptorBank& Bank(const DescriptorBankInfo& reqs);
52 79
53 const Device& device; 80 const Device& device;
54 MasterSemaphore& master_semaphore; 81 MasterSemaphore& master_semaphore;
55 82
56 std::vector<vk::DescriptorPool> pools; 83 std::shared_mutex banks_mutex;
57 vk::DescriptorPool* active_pool; 84 std::vector<DescriptorBankInfo> bank_infos;
85 std::vector<std::unique_ptr<DescriptorBank>> banks;
58}; 86};
59 87
60} // namespace Vulkan \ No newline at end of file 88} // namespace Vulkan \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index fc6dd83eb..18482e1d0 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -1,29 +1,58 @@
1// Copyright 2019 yuzu Emulator Project 1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <span>
7#include <cstring>
8#include <vector>
9 7
10#include "common/common_types.h" 8#include <boost/container/small_vector.hpp>
11#include "common/microprofile.h" 9#include <boost/container/static_vector.hpp>
12#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 10
11#include "common/bit_field.h"
13#include "video_core/renderer_vulkan/maxwell_to_vk.h" 12#include "video_core/renderer_vulkan/maxwell_to_vk.h"
14#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 13#include "video_core/renderer_vulkan/pipeline_helper.h"
14#include "video_core/renderer_vulkan/vk_buffer_cache.h"
15#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 15#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
16#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 16#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
17#include "video_core/renderer_vulkan/vk_scheduler.h" 17#include "video_core/renderer_vulkan/vk_scheduler.h"
18#include "video_core/renderer_vulkan/vk_texture_cache.h"
18#include "video_core/renderer_vulkan/vk_update_descriptor.h" 19#include "video_core/renderer_vulkan/vk_update_descriptor.h"
20#include "video_core/shader_notify.h"
19#include "video_core/vulkan_common/vulkan_device.h" 21#include "video_core/vulkan_common/vulkan_device.h"
20#include "video_core/vulkan_common/vulkan_wrapper.h"
21
22namespace Vulkan {
23 22
24MICROPROFILE_DECLARE(Vulkan_PipelineCache); 23#if defined(_MSC_VER) && defined(NDEBUG)
24#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
25#else
26#define LAMBDA_FORCEINLINE
27#endif
25 28
29namespace Vulkan {
26namespace { 30namespace {
31using boost::container::small_vector;
32using boost::container::static_vector;
33using Shader::ImageBufferDescriptor;
34using Tegra::Texture::TexturePair;
35using VideoCore::Surface::PixelFormat;
36using VideoCore::Surface::PixelFormatFromDepthFormat;
37using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
38
39constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage;
40constexpr size_t MAX_IMAGE_ELEMENTS = 64;
41
42DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span<const Shader::Info> infos) {
43 DescriptorLayoutBuilder builder{device};
44 for (size_t index = 0; index < infos.size(); ++index) {
45 static constexpr std::array stages{
46 VK_SHADER_STAGE_VERTEX_BIT,
47 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
48 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
49 VK_SHADER_STAGE_GEOMETRY_BIT,
50 VK_SHADER_STAGE_FRAGMENT_BIT,
51 };
52 builder.Add(infos[index], stages.at(index));
53 }
54 return builder;
55}
27 56
28template <class StencilFace> 57template <class StencilFace>
29VkStencilOpState GetStencilFaceState(const StencilFace& face) { 58VkStencilOpState GetStencilFaceState(const StencilFace& face) {
@@ -39,15 +68,24 @@ VkStencilOpState GetStencilFaceState(const StencilFace& face) {
39} 68}
40 69
41bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { 70bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
42 static constexpr std::array unsupported_topologies = { 71 static constexpr std::array unsupported_topologies{
43 VK_PRIMITIVE_TOPOLOGY_POINT_LIST, 72 VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
44 VK_PRIMITIVE_TOPOLOGY_LINE_LIST, 73 VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
45 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 74 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
46 VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, 75 VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY,
47 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, 76 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY,
48 VK_PRIMITIVE_TOPOLOGY_PATCH_LIST}; 77 VK_PRIMITIVE_TOPOLOGY_PATCH_LIST,
49 return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), 78 // VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT,
50 topology) == std::end(unsupported_topologies); 79 };
80 return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end();
81}
82
83bool IsLine(VkPrimitiveTopology topology) {
84 static constexpr std::array line_topologies{
85 VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_STRIP,
86 // VK_PRIMITIVE_TOPOLOGY_LINE_LOOP_EXT,
87 };
88 return std::ranges::find(line_topologies, topology) == line_topologies.end();
51} 89}
52 90
53VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { 91VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
@@ -59,8 +97,7 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
59 BitField<12, 3, Maxwell::ViewportSwizzle> w; 97 BitField<12, 3, Maxwell::ViewportSwizzle> w;
60 }; 98 };
61 const Swizzle unpacked{swizzle}; 99 const Swizzle unpacked{swizzle};
62 100 return VkViewportSwizzleNV{
63 return {
64 .x = MaxwellToVK::ViewportSwizzle(unpacked.x), 101 .x = MaxwellToVK::ViewportSwizzle(unpacked.x),
65 .y = MaxwellToVK::ViewportSwizzle(unpacked.y), 102 .y = MaxwellToVK::ViewportSwizzle(unpacked.y),
66 .z = MaxwellToVK::ViewportSwizzle(unpacked.z), 103 .z = MaxwellToVK::ViewportSwizzle(unpacked.z),
@@ -68,193 +105,446 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
68 }; 105 };
69} 106}
70 107
71VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { 108PixelFormat DecodeFormat(u8 encoded_format) {
72 switch (msaa_mode) { 109 const auto format{static_cast<Tegra::RenderTargetFormat>(encoded_format)};
73 case Tegra::Texture::MsaaMode::Msaa1x1: 110 if (format == Tegra::RenderTargetFormat::NONE) {
74 return VK_SAMPLE_COUNT_1_BIT; 111 return PixelFormat::Invalid;
75 case Tegra::Texture::MsaaMode::Msaa2x1:
76 case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
77 return VK_SAMPLE_COUNT_2_BIT;
78 case Tegra::Texture::MsaaMode::Msaa2x2:
79 case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
80 case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
81 return VK_SAMPLE_COUNT_4_BIT;
82 case Tegra::Texture::MsaaMode::Msaa4x2:
83 case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
84 case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
85 case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
86 return VK_SAMPLE_COUNT_8_BIT;
87 case Tegra::Texture::MsaaMode::Msaa4x4:
88 return VK_SAMPLE_COUNT_16_BIT;
89 default:
90 UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
91 return VK_SAMPLE_COUNT_1_BIT;
92 } 112 }
113 return PixelFormatFromRenderTargetFormat(format);
93} 114}
94 115
95} // Anonymous namespace 116RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) {
117 RenderPassKey key;
118 std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat);
119 if (state.depth_enabled != 0) {
120 const auto depth_format{static_cast<Tegra::DepthFormat>(state.depth_format.Value())};
121 key.depth_format = PixelFormatFromDepthFormat(depth_format);
122 } else {
123 key.depth_format = PixelFormat::Invalid;
124 }
125 key.samples = MaxwellToVK::MsaaMode(state.msaa_mode);
126 return key;
127}
96 128
97VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, 129size_t NumAttachments(const FixedPipelineState& state) {
98 VKDescriptorPool& descriptor_pool_, 130 size_t num{};
99 VKUpdateDescriptorQueue& update_descriptor_queue_, 131 for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
100 const GraphicsPipelineCacheKey& key, 132 const auto format{static_cast<Tegra::RenderTargetFormat>(state.color_formats[index])};
101 vk::Span<VkDescriptorSetLayoutBinding> bindings, 133 if (format != Tegra::RenderTargetFormat::NONE) {
102 const SPIRVProgram& program, u32 num_color_buffers) 134 num = index + 1;
103 : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, 135 }
104 descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, 136 }
105 descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, 137 return num;
106 update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
107 descriptor_template{CreateDescriptorUpdateTemplate(program)},
108 modules(CreateShaderModules(program)),
109 pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {}
110
111VKGraphicsPipeline::~VKGraphicsPipeline() = default;
112
113VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
114 if (!descriptor_template) {
115 return {};
116 }
117 const VkDescriptorSet set = descriptor_allocator.Commit();
118 update_descriptor_queue.Send(*descriptor_template, set);
119 return set;
120} 138}
121 139
122vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( 140template <typename Spec>
123 vk::Span<VkDescriptorSetLayoutBinding> bindings) const { 141bool Passes(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
124 const VkDescriptorSetLayoutCreateInfo ci{ 142 const std::array<Shader::Info, NUM_STAGES>& stage_infos) {
125 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 143 for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
126 .pNext = nullptr, 144 if (!Spec::enabled_stages[stage] && modules[stage]) {
127 .flags = 0, 145 return false;
128 .bindingCount = bindings.size(), 146 }
129 .pBindings = bindings.data(), 147 const auto& info{stage_infos[stage]};
130 }; 148 if constexpr (!Spec::has_storage_buffers) {
131 return device.GetLogical().CreateDescriptorSetLayout(ci); 149 if (!info.storage_buffers_descriptors.empty()) {
150 return false;
151 }
152 }
153 if constexpr (!Spec::has_texture_buffers) {
154 if (!info.texture_buffer_descriptors.empty()) {
155 return false;
156 }
157 }
158 if constexpr (!Spec::has_image_buffers) {
159 if (!info.image_buffer_descriptors.empty()) {
160 return false;
161 }
162 }
163 if constexpr (!Spec::has_images) {
164 if (!info.image_descriptors.empty()) {
165 return false;
166 }
167 }
168 }
169 return true;
132} 170}
133 171
134vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { 172using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool);
135 const VkPipelineLayoutCreateInfo ci{ 173
136 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 174template <typename Spec, typename... Specs>
137 .pNext = nullptr, 175ConfigureFuncPtr FindSpec(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
138 .flags = 0, 176 const std::array<Shader::Info, NUM_STAGES>& stage_infos) {
139 .setLayoutCount = 1, 177 if constexpr (sizeof...(Specs) > 0) {
140 .pSetLayouts = descriptor_set_layout.address(), 178 if (!Passes<Spec>(modules, stage_infos)) {
141 .pushConstantRangeCount = 0, 179 return FindSpec<Specs...>(modules, stage_infos);
142 .pPushConstantRanges = nullptr, 180 }
143 }; 181 }
144 return device.GetLogical().CreatePipelineLayout(ci); 182 return GraphicsPipeline::MakeConfigureSpecFunc<Spec>();
145} 183}
146 184
147vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate( 185struct SimpleVertexFragmentSpec {
148 const SPIRVProgram& program) const { 186 static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true};
149 std::vector<VkDescriptorUpdateTemplateEntry> template_entries; 187 static constexpr bool has_storage_buffers = false;
150 u32 binding = 0; 188 static constexpr bool has_texture_buffers = false;
151 u32 offset = 0; 189 static constexpr bool has_image_buffers = false;
152 for (const auto& stage : program) { 190 static constexpr bool has_images = false;
153 if (stage) { 191};
154 FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); 192
193struct SimpleVertexSpec {
194 static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false};
195 static constexpr bool has_storage_buffers = false;
196 static constexpr bool has_texture_buffers = false;
197 static constexpr bool has_image_buffers = false;
198 static constexpr bool has_images = false;
199};
200
201struct DefaultSpec {
202 static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
203 static constexpr bool has_storage_buffers = true;
204 static constexpr bool has_texture_buffers = true;
205 static constexpr bool has_image_buffers = true;
206 static constexpr bool has_images = true;
207};
208
209ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
210 const std::array<Shader::Info, NUM_STAGES>& infos) {
211 return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(modules, infos);
212}
213} // Anonymous namespace
214
215GraphicsPipeline::GraphicsPipeline(
216 Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
217 VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_,
218 VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool,
219 VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread,
220 RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_,
221 std::array<vk::ShaderModule, NUM_STAGES> stages,
222 const std::array<const Shader::Info*, NUM_STAGES>& infos)
223 : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_},
224 texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_},
225 update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} {
226 if (shader_notify) {
227 shader_notify->MarkShaderBuilding();
228 }
229 for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
230 const Shader::Info* const info{infos[stage]};
231 if (!info) {
232 continue;
155 } 233 }
234 stage_infos[stage] = *info;
235 enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
236 std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
156 } 237 }
157 if (template_entries.empty()) { 238 auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] {
158 // If the shader doesn't use descriptor sets, skip template creation. 239 DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
159 return {}; 240 uses_push_descriptor = builder.CanUsePushDescriptor();
241 descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor);
242 if (!uses_push_descriptor) {
243 descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos);
244 }
245 const VkDescriptorSetLayout set_layout{*descriptor_set_layout};
246 pipeline_layout = builder.CreatePipelineLayout(set_layout);
247 descriptor_update_template =
248 builder.CreateTemplate(set_layout, *pipeline_layout, uses_push_descriptor);
249
250 const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))};
251 Validate();
252 MakePipeline(render_pass);
253
254 std::lock_guard lock{build_mutex};
255 is_built = true;
256 build_condvar.notify_one();
257 if (shader_notify) {
258 shader_notify->MarkShaderComplete();
259 }
260 }};
261 if (worker_thread) {
262 worker_thread->QueueWork(std::move(func));
263 } else {
264 func();
160 } 265 }
266 configure_func = ConfigureFunc(spv_modules, stage_infos);
267}
161 268
162 const VkDescriptorUpdateTemplateCreateInfoKHR ci{ 269void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
163 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, 270 transition_keys.push_back(transition->key);
164 .pNext = nullptr, 271 transitions.push_back(transition);
165 .flags = 0,
166 .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
167 .pDescriptorUpdateEntries = template_entries.data(),
168 .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
169 .descriptorSetLayout = *descriptor_set_layout,
170 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
171 .pipelineLayout = *layout,
172 .set = DESCRIPTOR_SET,
173 };
174 return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci);
175} 272}
176 273
177std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( 274template <typename Spec>
178 const SPIRVProgram& program) const { 275void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
179 VkShaderModuleCreateInfo ci{ 276 std::array<ImageId, MAX_IMAGE_ELEMENTS> image_view_ids;
180 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 277 std::array<u32, MAX_IMAGE_ELEMENTS> image_view_indices;
181 .pNext = nullptr, 278 std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers;
182 .flags = 0, 279 size_t sampler_index{};
183 .codeSize = 0, 280 size_t image_index{};
184 .pCode = nullptr, 281
185 }; 282 texture_cache.SynchronizeGraphicsDescriptors();
283
284 buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
285
286 const auto& regs{maxwell3d.regs};
287 const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
288 const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
289 const Shader::Info& info{stage_infos[stage]};
290 buffer_cache.UnbindGraphicsStorageBuffers(stage);
291 if constexpr (Spec::has_storage_buffers) {
292 size_t ssbo_index{};
293 for (const auto& desc : info.storage_buffers_descriptors) {
294 ASSERT(desc.count == 1);
295 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index,
296 desc.cbuf_offset, desc.is_written);
297 ++ssbo_index;
298 }
299 }
300 const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
301 const auto read_handle{[&](const auto& desc, u32 index) {
302 ASSERT(cbufs[desc.cbuf_index].enabled);
303 const u32 index_offset{index << desc.size_shift};
304 const u32 offset{desc.cbuf_offset + index_offset};
305 const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset};
306 if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
307 std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
308 if (desc.has_secondary) {
309 ASSERT(cbufs[desc.secondary_cbuf_index].enabled);
310 const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
311 const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
312 second_offset};
313 const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
314 const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
315 const u32 raw{lhs_raw | rhs_raw};
316 return TexturePair(raw, via_header_index);
317 }
318 }
319 return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
320 }};
321 const auto add_image{[&](const auto& desc) {
322 for (u32 index = 0; index < desc.count; ++index) {
323 const auto handle{read_handle(desc, index)};
324 image_view_indices[image_index++] = handle.first;
325 }
326 }};
327 if constexpr (Spec::has_texture_buffers) {
328 for (const auto& desc : info.texture_buffer_descriptors) {
329 add_image(desc);
330 }
331 }
332 if constexpr (Spec::has_image_buffers) {
333 for (const auto& desc : info.image_buffer_descriptors) {
334 add_image(desc);
335 }
336 }
337 for (const auto& desc : info.texture_descriptors) {
338 for (u32 index = 0; index < desc.count; ++index) {
339 const auto handle{read_handle(desc, index)};
340 image_view_indices[image_index++] = handle.first;
186 341
187 std::vector<vk::ShaderModule> shader_modules; 342 Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
188 shader_modules.reserve(Maxwell::MaxShaderStage); 343 samplers[sampler_index++] = sampler->Handle();
189 for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { 344 }
190 const auto& stage = program[i]; 345 }
191 if (!stage) { 346 if constexpr (Spec::has_images) {
192 continue; 347 for (const auto& desc : info.image_descriptors) {
348 add_image(desc);
349 }
193 } 350 }
351 }};
352 if constexpr (Spec::enabled_stages[0]) {
353 config_stage(0);
354 }
355 if constexpr (Spec::enabled_stages[1]) {
356 config_stage(1);
357 }
358 if constexpr (Spec::enabled_stages[2]) {
359 config_stage(2);
360 }
361 if constexpr (Spec::enabled_stages[3]) {
362 config_stage(3);
363 }
364 if constexpr (Spec::enabled_stages[4]) {
365 config_stage(4);
366 }
367 const std::span indices_span(image_view_indices.data(), image_index);
368 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
369
370 ImageId* texture_buffer_index{image_view_ids.data()};
371 const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
372 size_t index{};
373 const auto add_buffer{[&](const auto& desc) {
374 constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
375 for (u32 i = 0; i < desc.count; ++i) {
376 bool is_written{false};
377 if constexpr (is_image) {
378 is_written = desc.is_written;
379 }
380 ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
381 buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
382 image_view.BufferSize(), image_view.format,
383 is_written, is_image);
384 ++index;
385 ++texture_buffer_index;
386 }
387 }};
388 buffer_cache.UnbindGraphicsTextureBuffers(stage);
194 389
195 device.SaveShader(stage->code); 390 const Shader::Info& info{stage_infos[stage]};
391 if constexpr (Spec::has_texture_buffers) {
392 for (const auto& desc : info.texture_buffer_descriptors) {
393 add_buffer(desc);
394 }
395 }
396 if constexpr (Spec::has_image_buffers) {
397 for (const auto& desc : info.image_buffer_descriptors) {
398 add_buffer(desc);
399 }
400 }
401 for (const auto& desc : info.texture_descriptors) {
402 texture_buffer_index += desc.count;
403 }
404 if constexpr (Spec::has_images) {
405 for (const auto& desc : info.image_descriptors) {
406 texture_buffer_index += desc.count;
407 }
408 }
409 }};
410 if constexpr (Spec::enabled_stages[0]) {
411 bind_stage_info(0);
412 }
413 if constexpr (Spec::enabled_stages[1]) {
414 bind_stage_info(1);
415 }
416 if constexpr (Spec::enabled_stages[2]) {
417 bind_stage_info(2);
418 }
419 if constexpr (Spec::enabled_stages[3]) {
420 bind_stage_info(3);
421 }
422 if constexpr (Spec::enabled_stages[4]) {
423 bind_stage_info(4);
424 }
425
426 buffer_cache.UpdateGraphicsBuffers(is_indexed);
427 buffer_cache.BindHostGeometryBuffers(is_indexed);
196 428
197 ci.codeSize = stage->code.size() * sizeof(u32); 429 update_descriptor_queue.Acquire();
198 ci.pCode = stage->code.data(); 430
199 shader_modules.push_back(device.GetLogical().CreateShaderModule(ci)); 431 const VkSampler* samplers_it{samplers.data()};
432 const ImageId* views_it{image_view_ids.data()};
433 const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
434 buffer_cache.BindHostStageBuffers(stage);
435 PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache,
436 update_descriptor_queue);
437 }};
438 if constexpr (Spec::enabled_stages[0]) {
439 prepare_stage(0);
440 }
441 if constexpr (Spec::enabled_stages[1]) {
442 prepare_stage(1);
200 } 443 }
201 return shader_modules; 444 if constexpr (Spec::enabled_stages[2]) {
445 prepare_stage(2);
446 }
447 if constexpr (Spec::enabled_stages[3]) {
448 prepare_stage(3);
449 }
450 if constexpr (Spec::enabled_stages[4]) {
451 prepare_stage(4);
452 }
453 ConfigureDraw();
202} 454}
203 455
204vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, 456void GraphicsPipeline::ConfigureDraw() {
205 VkRenderPass renderpass, 457 texture_cache.UpdateRenderTargets(false);
206 u32 num_color_buffers) const { 458 scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
207 const auto& state = cache_key.fixed_state; 459
208 const auto& viewport_swizzles = state.viewport_swizzles; 460 if (!is_built.load(std::memory_order::relaxed)) {
209 461 // Wait for the pipeline to be built
210 FixedPipelineState::DynamicState dynamic; 462 scheduler.Record([this](vk::CommandBuffer) {
211 if (device.IsExtExtendedDynamicStateSupported()) { 463 std::unique_lock lock{build_mutex};
212 // Insert dummy values, as long as they are valid they don't matter as extended dynamic 464 build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
213 // state is ignored
214 dynamic.raw1 = 0;
215 dynamic.raw2 = 0;
216 dynamic.vertex_strides.fill(0);
217 } else {
218 dynamic = state.dynamic_state;
219 }
220
221 std::vector<VkVertexInputBindingDescription> vertex_bindings;
222 std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
223 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
224 const bool instanced = state.binding_divisors[index] != 0;
225 const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
226 vertex_bindings.push_back({
227 .binding = static_cast<u32>(index),
228 .stride = dynamic.vertex_strides[index],
229 .inputRate = rate,
230 }); 465 });
231 if (instanced) {
232 vertex_binding_divisors.push_back({
233 .binding = static_cast<u32>(index),
234 .divisor = state.binding_divisors[index],
235 });
236 }
237 } 466 }
467 const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
468 const void* const descriptor_data{update_descriptor_queue.UpdateData()};
469 scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) {
470 if (bind_pipeline) {
471 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
472 }
473 if (!descriptor_set_layout) {
474 return;
475 }
476 if (uses_push_descriptor) {
477 cmdbuf.PushDescriptorSetWithTemplateKHR(*descriptor_update_template, *pipeline_layout,
478 0, descriptor_data);
479 } else {
480 const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
481 const vk::Device& dev{device.GetLogical()};
482 dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
483 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
484 descriptor_set, nullptr);
485 }
486 });
487}
238 488
239 std::vector<VkVertexInputAttributeDescription> vertex_attributes; 489void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
240 const auto& input_attributes = program[0]->entries.attributes; 490 FixedPipelineState::DynamicState dynamic{};
241 for (std::size_t index = 0; index < state.attributes.size(); ++index) { 491 if (!key.state.extended_dynamic_state) {
242 const auto& attribute = state.attributes[index]; 492 dynamic = key.state.dynamic_state;
243 if (!attribute.enabled) { 493 }
244 continue; 494 static_vector<VkVertexInputBindingDescription, 32> vertex_bindings;
495 static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors;
496 static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes;
497 if (key.state.dynamic_vertex_input) {
498 for (size_t index = 0; index < key.state.attributes.size(); ++index) {
499 const u32 type = key.state.DynamicAttributeType(index);
500 if (!stage_infos[0].loads.Generic(index) || type == 0) {
501 continue;
502 }
503 vertex_attributes.push_back({
504 .location = static_cast<u32>(index),
505 .binding = 0,
506 .format = type == 1 ? VK_FORMAT_R32_SFLOAT
507 : type == 2 ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT,
508 .offset = 0,
509 });
245 } 510 }
246 if (!input_attributes.contains(static_cast<u32>(index))) { 511 if (!vertex_attributes.empty()) {
247 // Skip attributes not used by the vertex shaders. 512 vertex_bindings.push_back({
248 continue; 513 .binding = 0,
514 .stride = 4,
515 .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
516 });
517 }
518 } else {
519 for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
520 const bool instanced = key.state.binding_divisors[index] != 0;
521 const auto rate =
522 instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
523 vertex_bindings.push_back({
524 .binding = static_cast<u32>(index),
525 .stride = dynamic.vertex_strides[index],
526 .inputRate = rate,
527 });
528 if (instanced) {
529 vertex_binding_divisors.push_back({
530 .binding = static_cast<u32>(index),
531 .divisor = key.state.binding_divisors[index],
532 });
533 }
534 }
535 for (size_t index = 0; index < key.state.attributes.size(); ++index) {
536 const auto& attribute = key.state.attributes[index];
537 if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) {
538 continue;
539 }
540 vertex_attributes.push_back({
541 .location = static_cast<u32>(index),
542 .binding = attribute.buffer,
543 .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
544 .offset = attribute.offset,
545 });
249 } 546 }
250 vertex_attributes.push_back({
251 .location = static_cast<u32>(index),
252 .binding = attribute.buffer,
253 .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
254 .offset = attribute.offset,
255 });
256 } 547 }
257
258 VkPipelineVertexInputStateCreateInfo vertex_input_ci{ 548 VkPipelineVertexInputStateCreateInfo vertex_input_ci{
259 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, 549 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
260 .pNext = nullptr, 550 .pNext = nullptr,
@@ -264,7 +554,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
264 .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()), 554 .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
265 .pVertexAttributeDescriptions = vertex_attributes.data(), 555 .pVertexAttributeDescriptions = vertex_attributes.data(),
266 }; 556 };
267
268 const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ 557 const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{
269 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, 558 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT,
270 .pNext = nullptr, 559 .pNext = nullptr,
@@ -274,78 +563,113 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
274 if (!vertex_binding_divisors.empty()) { 563 if (!vertex_binding_divisors.empty()) {
275 vertex_input_ci.pNext = &input_divisor_ci; 564 vertex_input_ci.pNext = &input_divisor_ci;
276 } 565 }
277 566 auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology);
278 const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); 567 if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) {
568 if (!spv_modules[1] && !spv_modules[2]) {
569 LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points");
570 input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
571 }
572 }
279 const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ 573 const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
280 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, 574 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
281 .pNext = nullptr, 575 .pNext = nullptr,
282 .flags = 0, 576 .flags = 0,
283 .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), 577 .topology = input_assembly_topology,
284 .primitiveRestartEnable = state.primitive_restart_enable != 0 && 578 .primitiveRestartEnable = key.state.primitive_restart_enable != 0 &&
285 SupportsPrimitiveRestart(input_assembly_topology), 579 SupportsPrimitiveRestart(input_assembly_topology),
286 }; 580 };
287
288 const VkPipelineTessellationStateCreateInfo tessellation_ci{ 581 const VkPipelineTessellationStateCreateInfo tessellation_ci{
289 .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, 582 .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
290 .pNext = nullptr, 583 .pNext = nullptr,
291 .flags = 0, 584 .flags = 0,
292 .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, 585 .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1,
293 };
294
295 VkPipelineViewportStateCreateInfo viewport_ci{
296 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
297 .pNext = nullptr,
298 .flags = 0,
299 .viewportCount = Maxwell::NumViewports,
300 .pViewports = nullptr,
301 .scissorCount = Maxwell::NumViewports,
302 .pScissors = nullptr,
303 }; 586 };
304 587
305 std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; 588 std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
306 std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); 589 std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
307 VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ 590 const VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
308 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, 591 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
309 .pNext = nullptr, 592 .pNext = nullptr,
310 .flags = 0, 593 .flags = 0,
311 .viewportCount = Maxwell::NumViewports, 594 .viewportCount = Maxwell::NumViewports,
312 .pViewportSwizzles = swizzles.data(), 595 .pViewportSwizzles = swizzles.data(),
313 }; 596 };
314 if (device.IsNvViewportSwizzleSupported()) { 597 const VkPipelineViewportStateCreateInfo viewport_ci{
315 viewport_ci.pNext = &swizzle_ci; 598 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
316 } 599 .pNext = device.IsNvViewportSwizzleSupported() ? &swizzle_ci : nullptr,
600 .flags = 0,
601 .viewportCount = Maxwell::NumViewports,
602 .pViewports = nullptr,
603 .scissorCount = Maxwell::NumViewports,
604 .pScissors = nullptr,
605 };
317 606
318 const VkPipelineRasterizationStateCreateInfo rasterization_ci{ 607 VkPipelineRasterizationStateCreateInfo rasterization_ci{
319 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, 608 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
320 .pNext = nullptr, 609 .pNext = nullptr,
321 .flags = 0, 610 .flags = 0,
322 .depthClampEnable = 611 .depthClampEnable =
323 static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), 612 static_cast<VkBool32>(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
324 .rasterizerDiscardEnable = 613 .rasterizerDiscardEnable =
325 static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), 614 static_cast<VkBool32>(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
326 .polygonMode = VK_POLYGON_MODE_FILL, 615 .polygonMode =
616 MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)),
327 .cullMode = static_cast<VkCullModeFlags>( 617 .cullMode = static_cast<VkCullModeFlags>(
328 dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), 618 dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
329 .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), 619 .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
330 .depthBiasEnable = state.depth_bias_enable, 620 .depthBiasEnable = key.state.depth_bias_enable,
331 .depthBiasConstantFactor = 0.0f, 621 .depthBiasConstantFactor = 0.0f,
332 .depthBiasClamp = 0.0f, 622 .depthBiasClamp = 0.0f,
333 .depthBiasSlopeFactor = 0.0f, 623 .depthBiasSlopeFactor = 0.0f,
334 .lineWidth = 1.0f, 624 .lineWidth = 1.0f,
335 }; 625 };
626 VkPipelineRasterizationLineStateCreateInfoEXT line_state{
627 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT,
628 .pNext = nullptr,
629 .lineRasterizationMode = key.state.smooth_lines != 0
630 ? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT
631 : VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT,
632 .stippledLineEnable = VK_FALSE, // TODO
633 .lineStippleFactor = 0,
634 .lineStipplePattern = 0,
635 };
636 VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{
637 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT,
638 .pNext = nullptr,
639 .flags = 0,
640 .conservativeRasterizationMode = key.state.conservative_raster_enable != 0
641 ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT
642 : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT,
643 .extraPrimitiveOverestimationSize = 0.0f,
644 };
645 VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{
646 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT,
647 .pNext = nullptr,
648 .provokingVertexMode = key.state.provoking_vertex_last != 0
649 ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT
650 : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT,
651 };
652 if (IsLine(input_assembly_topology) && device.IsExtLineRasterizationSupported()) {
653 line_state.pNext = std::exchange(rasterization_ci.pNext, &line_state);
654 }
655 if (device.IsExtConservativeRasterizationSupported()) {
656 conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster);
657 }
658 if (device.IsExtProvokingVertexSupported()) {
659 provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex);
660 }
336 661
337 const VkPipelineMultisampleStateCreateInfo multisample_ci{ 662 const VkPipelineMultisampleStateCreateInfo multisample_ci{
338 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, 663 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
339 .pNext = nullptr, 664 .pNext = nullptr,
340 .flags = 0, 665 .flags = 0,
341 .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), 666 .rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode),
342 .sampleShadingEnable = VK_FALSE, 667 .sampleShadingEnable = VK_FALSE,
343 .minSampleShading = 0.0f, 668 .minSampleShading = 0.0f,
344 .pSampleMask = nullptr, 669 .pSampleMask = nullptr,
345 .alphaToCoverageEnable = VK_FALSE, 670 .alphaToCoverageEnable = VK_FALSE,
346 .alphaToOneEnable = VK_FALSE, 671 .alphaToOneEnable = VK_FALSE,
347 }; 672 };
348
349 const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ 673 const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
350 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, 674 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
351 .pNext = nullptr, 675 .pNext = nullptr,
@@ -355,32 +679,32 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
355 .depthCompareOp = dynamic.depth_test_enable 679 .depthCompareOp = dynamic.depth_test_enable
356 ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) 680 ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc())
357 : VK_COMPARE_OP_ALWAYS, 681 : VK_COMPARE_OP_ALWAYS,
358 .depthBoundsTestEnable = dynamic.depth_bounds_enable, 682 .depthBoundsTestEnable = dynamic.depth_bounds_enable && device.IsDepthBoundsSupported(),
359 .stencilTestEnable = dynamic.stencil_enable, 683 .stencilTestEnable = dynamic.stencil_enable,
360 .front = GetStencilFaceState(dynamic.front), 684 .front = GetStencilFaceState(dynamic.front),
361 .back = GetStencilFaceState(dynamic.back), 685 .back = GetStencilFaceState(dynamic.back),
362 .minDepthBounds = 0.0f, 686 .minDepthBounds = 0.0f,
363 .maxDepthBounds = 0.0f, 687 .maxDepthBounds = 0.0f,
364 }; 688 };
365 689 if (dynamic.depth_bounds_enable && !device.IsDepthBoundsSupported()) {
366 std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; 690 LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
367 for (std::size_t index = 0; index < num_color_buffers; ++index) { 691 }
368 static constexpr std::array COMPONENT_TABLE{ 692 static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
693 const size_t num_attachments{NumAttachments(key.state)};
694 for (size_t index = 0; index < num_attachments; ++index) {
695 static constexpr std::array mask_table{
369 VK_COLOR_COMPONENT_R_BIT, 696 VK_COLOR_COMPONENT_R_BIT,
370 VK_COLOR_COMPONENT_G_BIT, 697 VK_COLOR_COMPONENT_G_BIT,
371 VK_COLOR_COMPONENT_B_BIT, 698 VK_COLOR_COMPONENT_B_BIT,
372 VK_COLOR_COMPONENT_A_BIT, 699 VK_COLOR_COMPONENT_A_BIT,
373 }; 700 };
374 const auto& blend = state.attachments[index]; 701 const auto& blend{key.state.attachments[index]};
375 702 const std::array mask{blend.Mask()};
376 VkColorComponentFlags color_components = 0; 703 VkColorComponentFlags write_mask{};
377 for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { 704 for (size_t i = 0; i < mask_table.size(); ++i) {
378 if (blend.Mask()[i]) { 705 write_mask |= mask[i] ? mask_table[i] : 0;
379 color_components |= COMPONENT_TABLE[i];
380 }
381 } 706 }
382 707 cb_attachments.push_back({
383 cb_attachments[index] = {
384 .blendEnable = blend.enable != 0, 708 .blendEnable = blend.enable != 0,
385 .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), 709 .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()),
386 .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), 710 .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()),
@@ -388,28 +712,27 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
388 .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), 712 .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()),
389 .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), 713 .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()),
390 .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), 714 .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()),
391 .colorWriteMask = color_components, 715 .colorWriteMask = write_mask,
392 }; 716 });
393 } 717 }
394
395 const VkPipelineColorBlendStateCreateInfo color_blend_ci{ 718 const VkPipelineColorBlendStateCreateInfo color_blend_ci{
396 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, 719 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
397 .pNext = nullptr, 720 .pNext = nullptr,
398 .flags = 0, 721 .flags = 0,
399 .logicOpEnable = VK_FALSE, 722 .logicOpEnable = VK_FALSE,
400 .logicOp = VK_LOGIC_OP_COPY, 723 .logicOp = VK_LOGIC_OP_COPY,
401 .attachmentCount = num_color_buffers, 724 .attachmentCount = static_cast<u32>(cb_attachments.size()),
402 .pAttachments = cb_attachments.data(), 725 .pAttachments = cb_attachments.data(),
403 .blendConstants = {}, 726 .blendConstants = {},
404 }; 727 };
405 728 static_vector<VkDynamicState, 19> dynamic_states{
406 std::vector dynamic_states{
407 VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, 729 VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
408 VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, 730 VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
409 VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 731 VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
410 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, 732 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
733 VK_DYNAMIC_STATE_LINE_WIDTH,
411 }; 734 };
412 if (device.IsExtExtendedDynamicStateSupported()) { 735 if (key.state.extended_dynamic_state) {
413 static constexpr std::array extended{ 736 static constexpr std::array extended{
414 VK_DYNAMIC_STATE_CULL_MODE_EXT, 737 VK_DYNAMIC_STATE_CULL_MODE_EXT,
415 VK_DYNAMIC_STATE_FRONT_FACE_EXT, 738 VK_DYNAMIC_STATE_FRONT_FACE_EXT,
@@ -421,9 +744,11 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
421 VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, 744 VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
422 VK_DYNAMIC_STATE_STENCIL_OP_EXT, 745 VK_DYNAMIC_STATE_STENCIL_OP_EXT,
423 }; 746 };
747 if (key.state.dynamic_vertex_input) {
748 dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT);
749 }
424 dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); 750 dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
425 } 751 }
426
427 const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ 752 const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
428 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, 753 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
429 .pNext = nullptr, 754 .pNext = nullptr,
@@ -431,34 +756,33 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
431 .dynamicStateCount = static_cast<u32>(dynamic_states.size()), 756 .dynamicStateCount = static_cast<u32>(dynamic_states.size()),
432 .pDynamicStates = dynamic_states.data(), 757 .pDynamicStates = dynamic_states.data(),
433 }; 758 };
434 759 [[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
435 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
436 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, 760 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
437 .pNext = nullptr, 761 .pNext = nullptr,
438 .requiredSubgroupSize = GuestWarpSize, 762 .requiredSubgroupSize = GuestWarpSize,
439 }; 763 };
440 764 static_vector<VkPipelineShaderStageCreateInfo, 5> shader_stages;
441 std::vector<VkPipelineShaderStageCreateInfo> shader_stages; 765 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
442 std::size_t module_index = 0; 766 if (!spv_modules[stage]) {
443 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
444 if (!program[stage]) {
445 continue; 767 continue;
446 } 768 }
447 769 [[maybe_unused]] auto& stage_ci =
448 VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); 770 shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{
449 stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; 771 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
450 stage_ci.pNext = nullptr; 772 .pNext = nullptr,
451 stage_ci.flags = 0; 773 .flags = 0,
452 stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)); 774 .stage = MaxwellToVK::ShaderStage(Shader::StageFromIndex(stage)),
453 stage_ci.module = *modules[module_index++]; 775 .module = *spv_modules[stage],
454 stage_ci.pName = "main"; 776 .pName = "main",
455 stage_ci.pSpecializationInfo = nullptr; 777 .pSpecializationInfo = nullptr,
456 778 });
779 /*
457 if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { 780 if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) {
458 stage_ci.pNext = &subgroup_size_ci; 781 stage_ci.pNext = &subgroup_size_ci;
459 } 782 }
783 */
460 } 784 }
461 return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ 785 pipeline = device.GetLogical().CreateGraphicsPipeline({
462 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, 786 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
463 .pNext = nullptr, 787 .pNext = nullptr,
464 .flags = 0, 788 .flags = 0,
@@ -473,12 +797,31 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
473 .pDepthStencilState = &depth_stencil_ci, 797 .pDepthStencilState = &depth_stencil_ci,
474 .pColorBlendState = &color_blend_ci, 798 .pColorBlendState = &color_blend_ci,
475 .pDynamicState = &dynamic_state_ci, 799 .pDynamicState = &dynamic_state_ci,
476 .layout = *layout, 800 .layout = *pipeline_layout,
477 .renderPass = renderpass, 801 .renderPass = render_pass,
478 .subpass = 0, 802 .subpass = 0,
479 .basePipelineHandle = nullptr, 803 .basePipelineHandle = nullptr,
480 .basePipelineIndex = 0, 804 .basePipelineIndex = 0,
481 }); 805 });
482} 806}
483 807
808void GraphicsPipeline::Validate() {
809 size_t num_images{};
810 for (const auto& info : stage_infos) {
811 for (const auto& desc : info.texture_buffer_descriptors) {
812 num_images += desc.count;
813 }
814 for (const auto& desc : info.image_buffer_descriptors) {
815 num_images += desc.count;
816 }
817 for (const auto& desc : info.texture_descriptors) {
818 num_images += desc.count;
819 }
820 for (const auto& desc : info.image_descriptors) {
821 num_images += desc.count;
822 }
823 }
824 ASSERT(num_images <= MAX_IMAGE_ELEMENTS);
825}
826
484} // namespace Vulkan 827} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 8b6a98fe0..2bd48d697 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -1,30 +1,36 @@
1// Copyright 2019 yuzu Emulator Project 1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once 5#pragma once
6 6
7#include <algorithm>
7#include <array> 8#include <array>
8#include <optional> 9#include <atomic>
9#include <vector> 10#include <condition_variable>
11#include <mutex>
12#include <type_traits>
10 13
11#include "common/common_types.h" 14#include "common/thread_worker.h"
15#include "shader_recompiler/shader_info.h"
12#include "video_core/engines/maxwell_3d.h" 16#include "video_core/engines/maxwell_3d.h"
13#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 17#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
18#include "video_core/renderer_vulkan/vk_buffer_cache.h"
14#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 19#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
15#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 20#include "video_core/renderer_vulkan/vk_texture_cache.h"
16#include "video_core/vulkan_common/vulkan_wrapper.h" 21#include "video_core/vulkan_common/vulkan_wrapper.h"
17 22
18namespace Vulkan { 23namespace VideoCore {
24class ShaderNotify;
25}
19 26
20using Maxwell = Tegra::Engines::Maxwell3D::Regs; 27namespace Vulkan {
21 28
22struct GraphicsPipelineCacheKey { 29struct GraphicsPipelineCacheKey {
23 VkRenderPass renderpass; 30 std::array<u64, 6> unique_hashes;
24 std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; 31 FixedPipelineState state;
25 FixedPipelineState fixed_state;
26 32
27 std::size_t Hash() const noexcept; 33 size_t Hash() const noexcept;
28 34
29 bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; 35 bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
30 36
@@ -32,72 +38,115 @@ struct GraphicsPipelineCacheKey {
32 return !operator==(rhs); 38 return !operator==(rhs);
33 } 39 }
34 40
35 std::size_t Size() const noexcept { 41 size_t Size() const noexcept {
36 return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); 42 return sizeof(unique_hashes) + state.Size();
37 } 43 }
38}; 44};
39static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); 45static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
40static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); 46static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
41static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); 47static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
42 48
49} // namespace Vulkan
50
51namespace std {
52template <>
53struct hash<Vulkan::GraphicsPipelineCacheKey> {
54 size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
55 return k.Hash();
56 }
57};
58} // namespace std
59
60namespace Vulkan {
61
43class Device; 62class Device;
44class VKDescriptorPool; 63class RenderPassCache;
45class VKScheduler; 64class VKScheduler;
46class VKUpdateDescriptorQueue; 65class VKUpdateDescriptorQueue;
47 66
48using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>; 67class GraphicsPipeline {
68 static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
49 69
50class VKGraphicsPipeline final {
51public: 70public:
52 explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, 71 explicit GraphicsPipeline(
53 VKDescriptorPool& descriptor_pool, 72 Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
54 VKUpdateDescriptorQueue& update_descriptor_queue_, 73 VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache,
55 const GraphicsPipelineCacheKey& key, 74 VideoCore::ShaderNotify* shader_notify, const Device& device,
56 vk::Span<VkDescriptorSetLayoutBinding> bindings, 75 DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue,
57 const SPIRVProgram& program, u32 num_color_buffers); 76 Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache,
58 ~VKGraphicsPipeline(); 77 const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages,
59 78 const std::array<const Shader::Info*, NUM_STAGES>& infos);
60 VkDescriptorSet CommitDescriptorSet(); 79
61 80 GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
62 VkPipeline GetHandle() const { 81 GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;
63 return *pipeline; 82
83 GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
84 GraphicsPipeline(const GraphicsPipeline&) = delete;
85
86 void AddTransition(GraphicsPipeline* transition);
87
88 void Configure(bool is_indexed) {
89 configure_func(this, is_indexed);
64 } 90 }
65 91
66 VkPipelineLayout GetLayout() const { 92 [[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept {
67 return *layout; 93 if (key == current_key) {
94 return this;
95 }
96 const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)};
97 return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)]
98 : nullptr;
68 } 99 }
69 100
70 GraphicsPipelineCacheKey GetCacheKey() const { 101 [[nodiscard]] bool IsBuilt() const noexcept {
71 return cache_key; 102 return is_built.load(std::memory_order::relaxed);
72 } 103 }
73 104
74private: 105 template <typename Spec>
75 vk::DescriptorSetLayout CreateDescriptorSetLayout( 106 static auto MakeConfigureSpecFunc() {
76 vk::Span<VkDescriptorSetLayoutBinding> bindings) const; 107 return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); };
108 }
77 109
78 vk::PipelineLayout CreatePipelineLayout() const; 110private:
111 template <typename Spec>
112 void ConfigureImpl(bool is_indexed);
79 113
80 vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( 114 void ConfigureDraw();
81 const SPIRVProgram& program) const;
82 115
83 std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; 116 void MakePipeline(VkRenderPass render_pass);
84 117
85 vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, 118 void Validate();
86 u32 num_color_buffers) const;
87 119
120 const GraphicsPipelineCacheKey key;
121 Tegra::Engines::Maxwell3D& maxwell3d;
122 Tegra::MemoryManager& gpu_memory;
88 const Device& device; 123 const Device& device;
124 TextureCache& texture_cache;
125 BufferCache& buffer_cache;
89 VKScheduler& scheduler; 126 VKScheduler& scheduler;
90 const GraphicsPipelineCacheKey cache_key; 127 VKUpdateDescriptorQueue& update_descriptor_queue;
91 const u64 hash; 128
129 void (*configure_func)(GraphicsPipeline*, bool){};
130
131 std::vector<GraphicsPipelineCacheKey> transition_keys;
132 std::vector<GraphicsPipeline*> transitions;
133
134 std::array<vk::ShaderModule, NUM_STAGES> spv_modules;
135
136 std::array<Shader::Info, NUM_STAGES> stage_infos;
137 std::array<u32, 5> enabled_uniform_buffer_masks{};
138 VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
92 139
93 vk::DescriptorSetLayout descriptor_set_layout; 140 vk::DescriptorSetLayout descriptor_set_layout;
94 DescriptorAllocator descriptor_allocator; 141 DescriptorAllocator descriptor_allocator;
95 VKUpdateDescriptorQueue& update_descriptor_queue; 142 vk::PipelineLayout pipeline_layout;
96 vk::PipelineLayout layout; 143 vk::DescriptorUpdateTemplateKHR descriptor_update_template;
97 vk::DescriptorUpdateTemplateKHR descriptor_template;
98 std::vector<vk::ShaderModule> modules;
99
100 vk::Pipeline pipeline; 144 vk::Pipeline pipeline;
145
146 std::condition_variable build_condvar;
147 std::mutex build_mutex;
148 std::atomic_bool is_built{false};
149 bool uses_push_descriptor{false};
101}; 150};
102 151
103} // namespace Vulkan 152} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index ee3cd35d0..4f8688118 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -39,9 +39,9 @@ public:
39 return KnownGpuTick() >= tick; 39 return KnownGpuTick() >= tick;
40 } 40 }
41 41
42 /// Advance to the logical tick. 42 /// Advance to the logical tick and return the old one
43 void NextTick() noexcept { 43 [[nodiscard]] u64 NextTick() noexcept {
44 ++current_tick; 44 return current_tick.fetch_add(1, std::memory_order::relaxed);
45 } 45 }
46 46
47 /// Refresh the known GPU tick 47 /// Refresh the known GPU tick
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 8991505ca..57b163247 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -4,444 +4,613 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <cstddef> 6#include <cstddef>
7#include <fstream>
7#include <memory> 8#include <memory>
9#include <thread>
8#include <vector> 10#include <vector>
9 11
10#include "common/bit_cast.h" 12#include "common/bit_cast.h"
11#include "common/cityhash.h" 13#include "common/cityhash.h"
14#include "common/fs/fs.h"
15#include "common/fs/path_util.h"
12#include "common/microprofile.h" 16#include "common/microprofile.h"
17#include "common/thread_worker.h"
13#include "core/core.h" 18#include "core/core.h"
14#include "core/memory.h" 19#include "core/memory.h"
20#include "shader_recompiler/backend/spirv/emit_spirv.h"
21#include "shader_recompiler/environment.h"
22#include "shader_recompiler/frontend/maxwell/control_flow.h"
23#include "shader_recompiler/frontend/maxwell/translate_program.h"
24#include "shader_recompiler/program_header.h"
25#include "video_core/dirty_flags.h"
15#include "video_core/engines/kepler_compute.h" 26#include "video_core/engines/kepler_compute.h"
16#include "video_core/engines/maxwell_3d.h" 27#include "video_core/engines/maxwell_3d.h"
17#include "video_core/memory_manager.h" 28#include "video_core/memory_manager.h"
18#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 29#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
19#include "video_core/renderer_vulkan/maxwell_to_vk.h" 30#include "video_core/renderer_vulkan/maxwell_to_vk.h"
31#include "video_core/renderer_vulkan/pipeline_helper.h"
20#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 32#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
21#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 33#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
22#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
23#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 34#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
24#include "video_core/renderer_vulkan/vk_rasterizer.h" 35#include "video_core/renderer_vulkan/vk_rasterizer.h"
25#include "video_core/renderer_vulkan/vk_scheduler.h" 36#include "video_core/renderer_vulkan/vk_scheduler.h"
37#include "video_core/renderer_vulkan/vk_shader_util.h"
26#include "video_core/renderer_vulkan/vk_update_descriptor.h" 38#include "video_core/renderer_vulkan/vk_update_descriptor.h"
27#include "video_core/shader/compiler_settings.h"
28#include "video_core/shader/memory_util.h"
29#include "video_core/shader_cache.h" 39#include "video_core/shader_cache.h"
40#include "video_core/shader_environment.h"
30#include "video_core/shader_notify.h" 41#include "video_core/shader_notify.h"
31#include "video_core/vulkan_common/vulkan_device.h" 42#include "video_core/vulkan_common/vulkan_device.h"
32#include "video_core/vulkan_common/vulkan_wrapper.h" 43#include "video_core/vulkan_common/vulkan_wrapper.h"
33 44
34namespace Vulkan { 45namespace Vulkan {
35
36MICROPROFILE_DECLARE(Vulkan_PipelineCache); 46MICROPROFILE_DECLARE(Vulkan_PipelineCache);
37 47
38using Tegra::Engines::ShaderType;
39using VideoCommon::Shader::GetShaderAddress;
40using VideoCommon::Shader::GetShaderCode;
41using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
42using VideoCommon::Shader::ProgramCode;
43using VideoCommon::Shader::STAGE_MAIN_OFFSET;
44
45namespace { 48namespace {
49using Shader::Backend::SPIRV::EmitSPIRV;
50using Shader::Maxwell::MergeDualVertexPrograms;
51using Shader::Maxwell::TranslateProgram;
52using VideoCommon::ComputeEnvironment;
53using VideoCommon::FileEnvironment;
54using VideoCommon::GenericEnvironment;
55using VideoCommon::GraphicsEnvironment;
56
57constexpr u32 CACHE_VERSION = 5;
58
59template <typename Container>
60auto MakeSpan(Container& container) {
61 return std::span(container.data(), container.size());
62}
46 63
47constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; 64Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) {
48constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 65 switch (comparison) {
49constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; 66 case Maxwell::ComparisonOp::Never:
50constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; 67 case Maxwell::ComparisonOp::NeverOld:
51constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; 68 return Shader::CompareFunction::Never;
52constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; 69 case Maxwell::ComparisonOp::Less:
53 70 case Maxwell::ComparisonOp::LessOld:
54constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ 71 return Shader::CompareFunction::Less;
55 .depth = VideoCommon::Shader::CompileDepth::FullDecompile, 72 case Maxwell::ComparisonOp::Equal:
56 .disable_else_derivation = true, 73 case Maxwell::ComparisonOp::EqualOld:
57}; 74 return Shader::CompareFunction::Equal;
58 75 case Maxwell::ComparisonOp::LessEqual:
59constexpr std::size_t GetStageFromProgram(std::size_t program) { 76 case Maxwell::ComparisonOp::LessEqualOld:
60 return program == 0 ? 0 : program - 1; 77 return Shader::CompareFunction::LessThanEqual;
78 case Maxwell::ComparisonOp::Greater:
79 case Maxwell::ComparisonOp::GreaterOld:
80 return Shader::CompareFunction::Greater;
81 case Maxwell::ComparisonOp::NotEqual:
82 case Maxwell::ComparisonOp::NotEqualOld:
83 return Shader::CompareFunction::NotEqual;
84 case Maxwell::ComparisonOp::GreaterEqual:
85 case Maxwell::ComparisonOp::GreaterEqualOld:
86 return Shader::CompareFunction::GreaterThanEqual;
87 case Maxwell::ComparisonOp::Always:
88 case Maxwell::ComparisonOp::AlwaysOld:
89 return Shader::CompareFunction::Always;
90 }
91 UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison);
92 return {};
61} 93}
62 94
63constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { 95Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) {
64 return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program))); 96 if (attr.enabled == 0) {
97 return Shader::AttributeType::Disabled;
98 }
99 switch (attr.Type()) {
100 case Maxwell::VertexAttribute::Type::SignedNorm:
101 case Maxwell::VertexAttribute::Type::UnsignedNorm:
102 case Maxwell::VertexAttribute::Type::UnsignedScaled:
103 case Maxwell::VertexAttribute::Type::SignedScaled:
104 case Maxwell::VertexAttribute::Type::Float:
105 return Shader::AttributeType::Float;
106 case Maxwell::VertexAttribute::Type::SignedInt:
107 return Shader::AttributeType::SignedInt;
108 case Maxwell::VertexAttribute::Type::UnsignedInt:
109 return Shader::AttributeType::UnsignedInt;
110 }
111 return Shader::AttributeType::Float;
65} 112}
66 113
67ShaderType GetShaderType(Maxwell::ShaderProgram program) { 114Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t index) {
68 switch (program) { 115 switch (state.DynamicAttributeType(index)) {
69 case Maxwell::ShaderProgram::VertexB: 116 case 0:
70 return ShaderType::Vertex; 117 return Shader::AttributeType::Disabled;
71 case Maxwell::ShaderProgram::TesselationControl: 118 case 1:
72 return ShaderType::TesselationControl; 119 return Shader::AttributeType::Float;
73 case Maxwell::ShaderProgram::TesselationEval: 120 case 2:
74 return ShaderType::TesselationEval; 121 return Shader::AttributeType::SignedInt;
75 case Maxwell::ShaderProgram::Geometry: 122 case 3:
76 return ShaderType::Geometry; 123 return Shader::AttributeType::UnsignedInt;
77 case Maxwell::ShaderProgram::Fragment:
78 return ShaderType::Fragment;
79 default:
80 UNIMPLEMENTED_MSG("program={}", program);
81 return ShaderType::Vertex;
82 } 124 }
125 return Shader::AttributeType::Disabled;
83} 126}
84 127
85template <VkDescriptorType descriptor_type, class Container> 128Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> programs,
86void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding, 129 const GraphicsPipelineCacheKey& key,
87 VkShaderStageFlags stage_flags, const Container& container) { 130 const Shader::IR::Program& program,
88 const u32 num_entries = static_cast<u32>(std::size(container)); 131 const Shader::IR::Program* previous_program) {
89 for (std::size_t i = 0; i < num_entries; ++i) { 132 Shader::RuntimeInfo info;
90 u32 count = 1; 133 if (previous_program) {
91 if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { 134 info.previous_stage_stores = previous_program->info.stores;
92 // Combined image samplers can be arrayed. 135 if (previous_program->is_geometry_passthrough) {
93 count = container[i].size; 136 info.previous_stage_stores.mask |= previous_program->info.passthrough.mask;
94 } 137 }
95 bindings.push_back({ 138 } else {
96 .binding = binding++, 139 info.previous_stage_stores.mask.set();
97 .descriptorType = descriptor_type, 140 }
98 .descriptorCount = count, 141 const Shader::Stage stage{program.stage};
99 .stageFlags = stage_flags, 142 const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough};
100 .pImmutableSamplers = nullptr, 143 const bool gl_ndc{key.state.ndc_minus_one_to_one != 0};
101 }); 144 const float point_size{Common::BitCast<float>(key.state.point_size)};
145 switch (stage) {
146 case Shader::Stage::VertexB:
147 if (!has_geometry) {
148 if (key.state.topology == Maxwell::PrimitiveTopology::Points) {
149 info.fixed_state_point_size = point_size;
150 }
151 if (key.state.xfb_enabled) {
152 info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
153 }
154 info.convert_depth_mode = gl_ndc;
155 }
156 if (key.state.dynamic_vertex_input) {
157 for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
158 info.generic_input_types[index] = AttributeType(key.state, index);
159 }
160 } else {
161 std::ranges::transform(key.state.attributes, info.generic_input_types.begin(),
162 &CastAttributeType);
163 }
164 break;
165 case Shader::Stage::TessellationEval:
166 // We have to flip tessellation clockwise for some reason...
167 info.tess_clockwise = key.state.tessellation_clockwise == 0;
168 info.tess_primitive = [&key] {
169 const u32 raw{key.state.tessellation_primitive.Value()};
170 switch (static_cast<Maxwell::TessellationPrimitive>(raw)) {
171 case Maxwell::TessellationPrimitive::Isolines:
172 return Shader::TessPrimitive::Isolines;
173 case Maxwell::TessellationPrimitive::Triangles:
174 return Shader::TessPrimitive::Triangles;
175 case Maxwell::TessellationPrimitive::Quads:
176 return Shader::TessPrimitive::Quads;
177 }
178 UNREACHABLE();
179 return Shader::TessPrimitive::Triangles;
180 }();
181 info.tess_spacing = [&] {
182 const u32 raw{key.state.tessellation_spacing};
183 switch (static_cast<Maxwell::TessellationSpacing>(raw)) {
184 case Maxwell::TessellationSpacing::Equal:
185 return Shader::TessSpacing::Equal;
186 case Maxwell::TessellationSpacing::FractionalOdd:
187 return Shader::TessSpacing::FractionalOdd;
188 case Maxwell::TessellationSpacing::FractionalEven:
189 return Shader::TessSpacing::FractionalEven;
190 }
191 UNREACHABLE();
192 return Shader::TessSpacing::Equal;
193 }();
194 break;
195 case Shader::Stage::Geometry:
196 if (program.output_topology == Shader::OutputTopology::PointList) {
197 info.fixed_state_point_size = point_size;
198 }
199 if (key.state.xfb_enabled != 0) {
200 info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
201 }
202 info.convert_depth_mode = gl_ndc;
203 break;
204 case Shader::Stage::Fragment:
205 info.alpha_test_func = MaxwellToCompareFunction(
206 key.state.UnpackComparisonOp(key.state.alpha_test_func.Value()));
207 info.alpha_test_reference = Common::BitCast<float>(key.state.alpha_test_ref);
208 break;
209 default:
210 break;
211 }
212 switch (key.state.topology) {
213 case Maxwell::PrimitiveTopology::Points:
214 info.input_topology = Shader::InputTopology::Points;
215 break;
216 case Maxwell::PrimitiveTopology::Lines:
217 case Maxwell::PrimitiveTopology::LineLoop:
218 case Maxwell::PrimitiveTopology::LineStrip:
219 info.input_topology = Shader::InputTopology::Lines;
220 break;
221 case Maxwell::PrimitiveTopology::Triangles:
222 case Maxwell::PrimitiveTopology::TriangleStrip:
223 case Maxwell::PrimitiveTopology::TriangleFan:
224 case Maxwell::PrimitiveTopology::Quads:
225 case Maxwell::PrimitiveTopology::QuadStrip:
226 case Maxwell::PrimitiveTopology::Polygon:
227 case Maxwell::PrimitiveTopology::Patches:
228 info.input_topology = Shader::InputTopology::Triangles;
229 break;
230 case Maxwell::PrimitiveTopology::LinesAdjacency:
231 case Maxwell::PrimitiveTopology::LineStripAdjacency:
232 info.input_topology = Shader::InputTopology::LinesAdjacency;
233 break;
234 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
235 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
236 info.input_topology = Shader::InputTopology::TrianglesAdjacency;
237 break;
102 } 238 }
239 info.force_early_z = key.state.early_z != 0;
240 info.y_negate = key.state.y_negate != 0;
241 return info;
103} 242}
243} // Anonymous namespace
104 244
105u32 FillDescriptorLayout(const ShaderEntries& entries, 245size_t ComputePipelineCacheKey::Hash() const noexcept {
106 std::vector<VkDescriptorSetLayoutBinding>& bindings, 246 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
107 Maxwell::ShaderProgram program_type, u32 base_binding) { 247 return static_cast<size_t>(hash);
108 const ShaderType stage = GetStageFromProgram(program_type);
109 const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage);
110
111 u32 binding = base_binding;
112 AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
113 AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
114 AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
115 AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
116 AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
117 AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
118 return binding;
119} 248}
120 249
121} // Anonymous namespace 250bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
251 return std::memcmp(&rhs, this, sizeof *this) == 0;
252}
122 253
123std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { 254size_t GraphicsPipelineCacheKey::Hash() const noexcept {
124 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); 255 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
125 return static_cast<std::size_t>(hash); 256 return static_cast<size_t>(hash);
126} 257}
127 258
128bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { 259bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
129 return std::memcmp(&rhs, this, Size()) == 0; 260 return std::memcmp(&rhs, this, Size()) == 0;
130} 261}
131 262
132std::size_t ComputePipelineCacheKey::Hash() const noexcept { 263PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
133 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); 264 Tegra::Engines::KeplerCompute& kepler_compute_,
134 return static_cast<std::size_t>(hash); 265 Tegra::MemoryManager& gpu_memory_, const Device& device_,
135} 266 VKScheduler& scheduler_, DescriptorPool& descriptor_pool_,
136 267 VKUpdateDescriptorQueue& update_descriptor_queue_,
137bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { 268 RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
138 return std::memcmp(&rhs, this, sizeof *this) == 0; 269 TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_)
270 : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
271 device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
272 update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
273 buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_},
274 use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
275 workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"),
276 serialization_thread(1, "yuzu:PipelineSerialization") {
277 const auto& float_control{device.FloatControlProperties()};
278 const VkDriverIdKHR driver_id{device.GetDriverID()};
279 profile = Shader::Profile{
280 .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U,
281 .unified_descriptor_binding = true,
282 .support_descriptor_aliasing = true,
283 .support_int8 = true,
284 .support_int16 = device.IsShaderInt16Supported(),
285 .support_int64 = device.IsShaderInt64Supported(),
286 .support_vertex_instance_id = false,
287 .support_float_controls = true,
288 .support_separate_denorm_behavior = float_control.denormBehaviorIndependence ==
289 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
290 .support_separate_rounding_mode =
291 float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
292 .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE,
293 .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
294 .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
295 .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
296 .support_fp16_signed_zero_nan_preserve =
297 float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
298 .support_fp32_signed_zero_nan_preserve =
299 float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
300 .support_fp64_signed_zero_nan_preserve =
301 float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
302 .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
303 .support_vote = true,
304 .support_viewport_index_layer_non_geometry =
305 device.IsExtShaderViewportIndexLayerSupported(),
306 .support_viewport_mask = device.IsNvViewportArray2Supported(),
307 .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(),
308 .support_demote_to_helper_invocation = true,
309 .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(),
310 .support_derivative_control = true,
311 .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
312
313 .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
314
315 .lower_left_origin_mode = false,
316 .need_declared_frag_colors = false,
317
318 .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
319 .has_broken_unsigned_image_offsets = false,
320 .has_broken_signed_operations = false,
321 .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR,
322 .ignore_nan_fp_comparisons = false,
323 };
324 host_info = Shader::HostTranslateInfo{
325 .support_float16 = device.IsFloat16Supported(),
326 .support_int64 = device.IsShaderInt64Supported(),
327 };
139} 328}
140 329
141Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_, 330PipelineCache::~PipelineCache() = default;
142 GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_)
143 : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_),
144 shader_ir(program_code, main_offset_, compiler_settings, registry),
145 entries(GenerateShaderEntries(shader_ir)) {}
146
147Shader::~Shader() = default;
148
149VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
150 Tegra::Engines::Maxwell3D& maxwell3d_,
151 Tegra::Engines::KeplerCompute& kepler_compute_,
152 Tegra::MemoryManager& gpu_memory_, const Device& device_,
153 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
154 VKUpdateDescriptorQueue& update_descriptor_queue_)
155 : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
156 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
157 scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
158 update_descriptor_queue_} {}
159
160VKPipelineCache::~VKPipelineCache() = default;
161 331
162std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { 332GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
163 std::array<Shader*, Maxwell::MaxShaderProgram> shaders{}; 333 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
164
165 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
166 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
167
168 // Skip stages that are not enabled
169 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
170 continue;
171 }
172
173 const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)};
174 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
175 ASSERT(cpu_addr);
176
177 Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
178 if (!result) {
179 const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)};
180
181 // No shader found - create a new one
182 static constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
183 const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
184 ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false);
185 const std::size_t size_in_bytes = code.size() * sizeof(u64);
186
187 auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr,
188 std::move(code), stage_offset);
189 result = shader.get();
190 334
191 if (cpu_addr) { 335 if (!RefreshStages(graphics_key.unique_hashes)) {
192 Register(std::move(shader), *cpu_addr, size_in_bytes); 336 current_pipeline = nullptr;
193 } else { 337 return nullptr;
194 null_shader = std::move(shader); 338 }
195 } 339 graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(),
340 device.IsExtVertexInputDynamicStateSupported());
341
342 if (current_pipeline) {
343 GraphicsPipeline* const next{current_pipeline->Next(graphics_key)};
344 if (next) {
345 current_pipeline = next;
346 return BuiltPipeline(current_pipeline);
196 } 347 }
197 shaders[index] = result;
198 } 348 }
199 return last_shaders = shaders; 349 return CurrentGraphicsPipelineSlowPath();
200} 350}
201 351
202VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( 352ComputePipeline* PipelineCache::CurrentComputePipeline() {
203 const GraphicsPipelineCacheKey& key, u32 num_color_buffers,
204 VideoCommon::Shader::AsyncShaders& async_shaders) {
205 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 353 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
206 354
207 if (last_graphics_pipeline && last_graphics_key == key) { 355 const ShaderInfo* const shader{ComputeShader()};
208 return last_graphics_pipeline; 356 if (!shader) {
209 } 357 return nullptr;
210 last_graphics_key = key;
211
212 if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) {
213 std::unique_lock lock{pipeline_cache};
214 const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
215 if (is_cache_miss) {
216 gpu.ShaderNotify().MarkSharderBuilding();
217 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
218 const auto [program, bindings] = DecompileShaders(key.fixed_state);
219 async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
220 update_descriptor_queue, bindings, program, key,
221 num_color_buffers);
222 }
223 last_graphics_pipeline = pair->second.get();
224 return last_graphics_pipeline;
225 } 358 }
226 359 const auto& qmd{kepler_compute.launch_description};
227 const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); 360 const ComputePipelineCacheKey key{
228 auto& entry = pair->second; 361 .unique_hash = shader->unique_hash,
229 if (is_cache_miss) { 362 .shared_memory_size = qmd.shared_alloc,
230 gpu.ShaderNotify().MarkSharderBuilding(); 363 .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
231 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); 364 };
232 const auto [program, bindings] = DecompileShaders(key.fixed_state); 365 const auto [pair, is_new]{compute_cache.try_emplace(key)};
233 entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, 366 auto& pipeline{pair->second};
234 update_descriptor_queue, key, bindings, 367 if (!is_new) {
235 program, num_color_buffers); 368 return pipeline.get();
236 gpu.ShaderNotify().MarkShaderComplete();
237 } 369 }
238 last_graphics_pipeline = entry.get(); 370 pipeline = CreateComputePipeline(key, shader);
239 return last_graphics_pipeline; 371 return pipeline.get();
240} 372}
241 373
242VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { 374void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
243 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 375 const VideoCore::DiskResourceLoadCallback& callback) {
244 376 if (title_id == 0) {
245 const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); 377 return;
246 auto& entry = pair->second;
247 if (!is_cache_miss) {
248 return *entry;
249 } 378 }
250 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); 379 const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)};
251 380 const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)};
252 const GPUVAddr gpu_addr = key.shader; 381 if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) {
253 382 LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories");
254 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 383 return;
255 ASSERT(cpu_addr); 384 }
385 pipeline_cache_filename = base_dir / "vulkan.bin";
386
387 struct {
388 std::mutex mutex;
389 size_t total{};
390 size_t built{};
391 bool has_loaded{};
392 } state;
393
394 const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
395 ComputePipelineCacheKey key;
396 file.read(reinterpret_cast<char*>(&key), sizeof(key));
397
398 workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable {
399 ShaderPools pools;
400 auto pipeline{CreateComputePipeline(pools, key, env, false)};
401 std::lock_guard lock{state.mutex};
402 if (pipeline) {
403 compute_cache.emplace(key, std::move(pipeline));
404 }
405 ++state.built;
406 if (state.has_loaded) {
407 callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
408 }
409 });
410 ++state.total;
411 }};
412 const bool extended_dynamic_state = device.IsExtExtendedDynamicStateSupported();
413 const bool dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported();
414 const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
415 GraphicsPipelineCacheKey key;
416 file.read(reinterpret_cast<char*>(&key), sizeof(key));
417
418 if ((key.state.extended_dynamic_state != 0) != extended_dynamic_state ||
419 (key.state.dynamic_vertex_input != 0) != dynamic_vertex_input) {
420 return;
421 }
422 workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
423 ShaderPools pools;
424 boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
425 for (auto& env : envs) {
426 env_ptrs.push_back(&env);
427 }
428 auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)};
256 429
257 Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); 430 std::lock_guard lock{state.mutex};
258 if (!shader) { 431 graphics_cache.emplace(key, std::move(pipeline));
259 // No shader found - create a new one 432 ++state.built;
260 const auto host_ptr = gpu_memory.GetPointer(gpu_addr); 433 if (state.has_loaded) {
434 callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
435 }
436 });
437 ++state.total;
438 }};
439 VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute,
440 load_graphics);
261 441
262 ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true); 442 std::unique_lock lock{state.mutex};
263 const std::size_t size_in_bytes = code.size() * sizeof(u64); 443 callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
444 state.has_loaded = true;
445 lock.unlock();
264 446
265 auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr, 447 workers.WaitForRequests();
266 *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET); 448}
267 shader = shader_info.get();
268 449
269 if (cpu_addr) { 450GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() {
270 Register(std::move(shader_info), *cpu_addr, size_in_bytes); 451 const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
271 } else { 452 auto& pipeline{pair->second};
272 null_kernel = std::move(shader_info); 453 if (is_new) {
273 } 454 pipeline = CreateGraphicsPipeline();
274 } 455 }
275 456 if (!pipeline) {
276 const Specialization specialization{ 457 return nullptr;
277 .base_binding = 0, 458 }
278 .workgroup_size = key.workgroup_size, 459 if (current_pipeline) {
279 .shared_memory_size = key.shared_memory_size, 460 current_pipeline->AddTransition(pipeline.get());
280 .point_size = std::nullopt, 461 }
281 .enabled_attributes = {}, 462 current_pipeline = pipeline.get();
282 .attribute_types = {}, 463 return BuiltPipeline(current_pipeline);
283 .ndc_minus_one_to_one = false,
284 };
285 const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
286 shader->GetRegistry(), specialization),
287 shader->GetEntries()};
288 entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
289 update_descriptor_queue, spirv_shader);
290 return *entry;
291} 464}
292 465
293void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) { 466GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept {
294 gpu.ShaderNotify().MarkShaderComplete(); 467 if (pipeline->IsBuilt()) {
295 std::unique_lock lock{pipeline_cache}; 468 return pipeline;
296 graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); 469 }
470 if (!use_asynchronous_shaders) {
471 return pipeline;
472 }
473 // If something is using depth, we can assume that games are not rendering anything which
474 // will be used one time.
475 if (maxwell3d.regs.zeta_enable) {
476 return nullptr;
477 }
478 // If games are using a small index count, we can assume these are full screen quads.
479 // Usually these shaders are only used once for building textures so we can assume they
480 // can't be built async
481 if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
482 return pipeline;
483 }
484 return nullptr;
297} 485}
298 486
299void VKPipelineCache::OnShaderRemoval(Shader* shader) { 487std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
300 bool finished = false; 488 ShaderPools& pools, const GraphicsPipelineCacheKey& key,
301 const auto Finish = [&] { 489 std::span<Shader::Environment* const> envs, bool build_in_parallel) try {
302 // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and 490 LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
303 // flush. 491 size_t env_index{0};
304 if (finished) { 492 std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
305 return; 493 const bool uses_vertex_a{key.unique_hashes[0] != 0};
306 } 494 const bool uses_vertex_b{key.unique_hashes[1] != 0};
307 finished = true; 495 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
308 scheduler.Finish(); 496 if (key.unique_hashes[index] == 0) {
309 };
310
311 const GPUVAddr invalidated_addr = shader->GetGpuAddr();
312 for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
313 auto& entry = it->first;
314 if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
315 entry.shaders.end()) {
316 ++it;
317 continue; 497 continue;
318 } 498 }
319 Finish(); 499 Shader::Environment& env{*envs[env_index]};
320 it = graphics_cache.erase(it); 500 ++env_index;
501
502 const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
503 Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
504 if (!uses_vertex_a || index != 1) {
505 // Normal path
506 programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
507 } else {
508 // VertexB path when VertexA is present.
509 auto& program_va{programs[0]};
510 auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
511 programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
512 }
321 } 513 }
322 for (auto it = compute_cache.begin(); it != compute_cache.end();) { 514 std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
323 auto& entry = it->first; 515 std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules;
324 if (entry.shader != invalidated_addr) { 516
325 ++it; 517 const Shader::IR::Program* previous_stage{};
518 Shader::Backend::Bindings binding;
519 for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram;
520 ++index) {
521 if (key.unique_hashes[index] == 0) {
326 continue; 522 continue;
327 } 523 }
328 Finish(); 524 UNIMPLEMENTED_IF(index == 0);
329 it = compute_cache.erase(it); 525
526 Shader::IR::Program& program{programs[index]};
527 const size_t stage_index{index - 1};
528 infos[stage_index] = &program.info;
529
530 const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
531 const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)};
532 device.SaveShader(code);
533 modules[stage_index] = BuildShader(device, code);
534 if (device.HasDebuggingToolAttached()) {
535 const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])};
536 modules[stage_index].SetObjectNameEXT(name.c_str());
537 }
538 previous_stage = &program;
330 } 539 }
540 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
541 return std::make_unique<GraphicsPipeline>(
542 maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
543 descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key,
544 std::move(modules), infos);
545
546} catch (const Shader::Exception& exception) {
547 LOG_ERROR(Render_Vulkan, "{}", exception.what());
548 return nullptr;
331} 549}
332 550
333std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> 551std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
334VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { 552 GraphicsEnvironments environments;
335 Specialization specialization; 553 GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
336 if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) {
337 float point_size;
338 std::memcpy(&point_size, &fixed_state.point_size, sizeof(float));
339 specialization.point_size = point_size;
340 ASSERT(point_size != 0.0f);
341 }
342 for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
343 const auto& attribute = fixed_state.attributes[i];
344 specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
345 specialization.attribute_types[i] = attribute.Type();
346 }
347 specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one;
348 specialization.early_fragment_tests = fixed_state.early_z;
349
350 // Alpha test
351 specialization.alpha_test_func =
352 FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value());
353 specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref);
354
355 SPIRVProgram program;
356 std::vector<VkDescriptorSetLayoutBinding> bindings;
357 554
358 for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) { 555 main_pools.ReleaseContents();
359 const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); 556 auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)};
360 // Skip stages that are not enabled 557 if (!pipeline || pipeline_cache_filename.empty()) {
361 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { 558 return pipeline;
362 continue;
363 }
364 const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum);
365 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
366 Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
367
368 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
369 const ShaderType program_type = GetShaderType(program_enum);
370 const auto& entries = shader->GetEntries();
371 program[stage] = {
372 Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
373 entries,
374 };
375
376 const u32 old_binding = specialization.base_binding;
377 specialization.base_binding =
378 FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
379 ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
380 } 559 }
381 return {std::move(program), std::move(bindings)}; 560 serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] {
382} 561 boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram>
383 562 env_ptrs;
384template <VkDescriptorType descriptor_type, class Container> 563 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
385void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding, 564 if (key.unique_hashes[index] != 0) {
386 u32& offset, const Container& container) { 565 env_ptrs.push_back(&envs[index]);
387 static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); 566 }
388 const u32 count = static_cast<u32>(std::size(container));
389
390 if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) {
391 for (u32 i = 0; i < count; ++i) {
392 const u32 num_samplers = container[i].size;
393 template_entries.push_back({
394 .dstBinding = binding,
395 .dstArrayElement = 0,
396 .descriptorCount = num_samplers,
397 .descriptorType = descriptor_type,
398 .offset = offset,
399 .stride = entry_size,
400 });
401
402 ++binding;
403 offset += num_samplers * entry_size;
404 } 567 }
405 return; 568 SerializePipeline(key, env_ptrs, pipeline_cache_filename, CACHE_VERSION);
406 } 569 });
570 return pipeline;
571}
407 572
408 if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || 573std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
409 descriptor_type == STORAGE_TEXEL_BUFFER) { 574 const ComputePipelineCacheKey& key, const ShaderInfo* shader) {
410 // Nvidia has a bug where updating multiple texels at once causes the driver to crash. 575 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
411 // Note: Fixed in driver Windows 443.24, Linux 440.66.15 576 const auto& qmd{kepler_compute.launch_description};
412 for (u32 i = 0; i < count; ++i) { 577 ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
413 template_entries.push_back({ 578 env.SetCachedSize(shader->size_bytes);
414 .dstBinding = binding + i, 579
415 .dstArrayElement = 0, 580 main_pools.ReleaseContents();
416 .descriptorCount = 1, 581 auto pipeline{CreateComputePipeline(main_pools, key, env, true)};
417 .descriptorType = descriptor_type, 582 if (!pipeline || pipeline_cache_filename.empty()) {
418 .offset = static_cast<std::size_t>(offset + i * entry_size), 583 return pipeline;
419 .stride = entry_size,
420 });
421 }
422 } else if (count > 0) {
423 template_entries.push_back({
424 .dstBinding = binding,
425 .dstArrayElement = 0,
426 .descriptorCount = count,
427 .descriptorType = descriptor_type,
428 .offset = offset,
429 .stride = entry_size,
430 });
431 } 584 }
432 offset += count * entry_size; 585 serialization_thread.QueueWork([this, key, env = std::move(env)] {
433 binding += count; 586 SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env},
587 pipeline_cache_filename, CACHE_VERSION);
588 });
589 return pipeline;
434} 590}
435 591
436void FillDescriptorUpdateTemplateEntries( 592std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
437 const ShaderEntries& entries, u32& binding, u32& offset, 593 ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
438 std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { 594 bool build_in_parallel) try {
439 AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); 595 LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
440 AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); 596
441 AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels); 597 Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
442 AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); 598 auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
443 AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels); 599 const std::vector<u32> code{EmitSPIRV(profile, program)};
444 AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); 600 device.SaveShader(code);
601 vk::ShaderModule spv_module{BuildShader(device, code)};
602 if (device.HasDebuggingToolAttached()) {
603 const auto name{fmt::format("Shader {:016x}", key.unique_hash)};
604 spv_module.SetObjectNameEXT(name.c_str());
605 }
606 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
607 return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue,
608 thread_worker, &shader_notify, program.info,
609 std::move(spv_module));
610
611} catch (const Shader::Exception& exception) {
612 LOG_ERROR(Render_Vulkan, "{}", exception.what());
613 return nullptr;
445} 614}
446 615
447} // namespace Vulkan 616} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 89d635a3d..efe5a7ed8 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -6,24 +6,28 @@
6 6
7#include <array> 7#include <array>
8#include <cstddef> 8#include <cstddef>
9#include <filesystem>
10#include <iosfwd>
9#include <memory> 11#include <memory>
10#include <type_traits> 12#include <type_traits>
11#include <unordered_map> 13#include <unordered_map>
12#include <utility> 14#include <utility>
13#include <vector> 15#include <vector>
14 16
15#include <boost/functional/hash.hpp>
16
17#include "common/common_types.h" 17#include "common/common_types.h"
18#include "video_core/engines/const_buffer_engine_interface.h" 18#include "common/thread_worker.h"
19#include "shader_recompiler/frontend/ir/basic_block.h"
20#include "shader_recompiler/frontend/ir/value.h"
21#include "shader_recompiler/frontend/maxwell/control_flow.h"
22#include "shader_recompiler/host_translate_info.h"
23#include "shader_recompiler/object_pool.h"
24#include "shader_recompiler/profile.h"
19#include "video_core/engines/maxwell_3d.h" 25#include "video_core/engines/maxwell_3d.h"
20#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 26#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
27#include "video_core/renderer_vulkan/vk_buffer_cache.h"
28#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
21#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 29#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
22#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 30#include "video_core/renderer_vulkan/vk_texture_cache.h"
23#include "video_core/shader/async_shaders.h"
24#include "video_core/shader/memory_util.h"
25#include "video_core/shader/registry.h"
26#include "video_core/shader/shader_ir.h"
27#include "video_core/shader_cache.h" 31#include "video_core/shader_cache.h"
28#include "video_core/vulkan_common/vulkan_wrapper.h" 32#include "video_core/vulkan_common/vulkan_wrapper.h"
29 33
@@ -31,23 +35,24 @@ namespace Core {
31class System; 35class System;
32} 36}
33 37
34namespace Vulkan { 38namespace Shader::IR {
39struct Program;
40}
35 41
36class Device; 42namespace VideoCore {
37class RasterizerVulkan; 43class ShaderNotify;
38class VKComputePipeline; 44}
39class VKDescriptorPool; 45
40class VKScheduler; 46namespace Vulkan {
41class VKUpdateDescriptorQueue;
42 47
43using Maxwell = Tegra::Engines::Maxwell3D::Regs; 48using Maxwell = Tegra::Engines::Maxwell3D::Regs;
44 49
45struct ComputePipelineCacheKey { 50struct ComputePipelineCacheKey {
46 GPUVAddr shader; 51 u64 unique_hash;
47 u32 shared_memory_size; 52 u32 shared_memory_size;
48 std::array<u32, 3> workgroup_size; 53 std::array<u32, 3> workgroup_size;
49 54
50 std::size_t Hash() const noexcept; 55 size_t Hash() const noexcept;
51 56
52 bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; 57 bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
53 58
@@ -64,15 +69,8 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
64namespace std { 69namespace std {
65 70
66template <> 71template <>
67struct hash<Vulkan::GraphicsPipelineCacheKey> {
68 std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
69 return k.Hash();
70 }
71};
72
73template <>
74struct hash<Vulkan::ComputePipelineCacheKey> { 72struct hash<Vulkan::ComputePipelineCacheKey> {
75 std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { 73 size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
76 return k.Hash(); 74 return k.Hash();
77 } 75 }
78}; 76};
@@ -81,94 +79,90 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
81 79
82namespace Vulkan { 80namespace Vulkan {
83 81
84class Shader { 82class ComputePipeline;
85public: 83class Device;
86 explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, 84class DescriptorPool;
87 Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_, 85class RasterizerVulkan;
88 VideoCommon::Shader::ProgramCode program_code, u32 main_offset_); 86class RenderPassCache;
89 ~Shader(); 87class VKScheduler;
90 88class VKUpdateDescriptorQueue;
91 GPUVAddr GetGpuAddr() const {
92 return gpu_addr;
93 }
94
95 VideoCommon::Shader::ShaderIR& GetIR() {
96 return shader_ir;
97 }
98
99 const VideoCommon::Shader::ShaderIR& GetIR() const {
100 return shader_ir;
101 }
102 89
103 const VideoCommon::Shader::Registry& GetRegistry() const { 90using VideoCommon::ShaderInfo;
104 return registry;
105 }
106 91
107 const ShaderEntries& GetEntries() const { 92struct ShaderPools {
108 return entries; 93 void ReleaseContents() {
94 flow_block.ReleaseContents();
95 block.ReleaseContents();
96 inst.ReleaseContents();
109 } 97 }
110 98
111private: 99 Shader::ObjectPool<Shader::IR::Inst> inst;
112 GPUVAddr gpu_addr{}; 100 Shader::ObjectPool<Shader::IR::Block> block;
113 VideoCommon::Shader::ProgramCode program_code; 101 Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
114 VideoCommon::Shader::Registry registry;
115 VideoCommon::Shader::ShaderIR shader_ir;
116 ShaderEntries entries;
117}; 102};
118 103
119class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { 104class PipelineCache : public VideoCommon::ShaderCache {
120public: 105public:
121 explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, 106 explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
122 Tegra::Engines::Maxwell3D& maxwell3d, 107 Tegra::Engines::KeplerCompute& kepler_compute,
123 Tegra::Engines::KeplerCompute& kepler_compute, 108 Tegra::MemoryManager& gpu_memory, const Device& device,
124 Tegra::MemoryManager& gpu_memory, const Device& device, 109 VKScheduler& scheduler, DescriptorPool& descriptor_pool,
125 VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, 110 VKUpdateDescriptorQueue& update_descriptor_queue,
126 VKUpdateDescriptorQueue& update_descriptor_queue); 111 RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
127 ~VKPipelineCache() override; 112 TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);
113 ~PipelineCache();
114
115 [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
128 116
129 std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); 117 [[nodiscard]] ComputePipeline* CurrentComputePipeline();
130 118
131 VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, 119 void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
132 u32 num_color_buffers, 120 const VideoCore::DiskResourceLoadCallback& callback);
133 VideoCommon::Shader::AsyncShaders& async_shaders);
134 121
135 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); 122private:
123 [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
136 124
137 void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline); 125 [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
138 126
139protected: 127 std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
140 void OnShaderRemoval(Shader* shader) final;
141 128
142private: 129 std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
143 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( 130 ShaderPools& pools, const GraphicsPipelineCacheKey& key,
144 const FixedPipelineState& fixed_state); 131 std::span<Shader::Environment* const> envs, bool build_in_parallel);
145 132
146 Tegra::GPU& gpu; 133 std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineCacheKey& key,
147 Tegra::Engines::Maxwell3D& maxwell3d; 134 const ShaderInfo* shader);
148 Tegra::Engines::KeplerCompute& kepler_compute; 135
149 Tegra::MemoryManager& gpu_memory; 136 std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderPools& pools,
137 const ComputePipelineCacheKey& key,
138 Shader::Environment& env,
139 bool build_in_parallel);
150 140
151 const Device& device; 141 const Device& device;
152 VKScheduler& scheduler; 142 VKScheduler& scheduler;
153 VKDescriptorPool& descriptor_pool; 143 DescriptorPool& descriptor_pool;
154 VKUpdateDescriptorQueue& update_descriptor_queue; 144 VKUpdateDescriptorQueue& update_descriptor_queue;
145 RenderPassCache& render_pass_cache;
146 BufferCache& buffer_cache;
147 TextureCache& texture_cache;
148 VideoCore::ShaderNotify& shader_notify;
149 bool use_asynchronous_shaders{};
155 150
156 std::unique_ptr<Shader> null_shader; 151 GraphicsPipelineCacheKey graphics_key{};
157 std::unique_ptr<Shader> null_kernel; 152 GraphicsPipeline* current_pipeline{};
158 153
159 std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; 154 std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
155 std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
160 156
161 GraphicsPipelineCacheKey last_graphics_key; 157 ShaderPools main_pools;
162 VKGraphicsPipeline* last_graphics_pipeline = nullptr;
163 158
164 std::mutex pipeline_cache; 159 Shader::Profile profile;
165 std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> 160 Shader::HostTranslateInfo host_info;
166 graphics_cache;
167 std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
168};
169 161
170void FillDescriptorUpdateTemplateEntries( 162 std::filesystem::path pipeline_cache_filename;
171 const ShaderEntries& entries, u32& binding, u32& offset, 163
172 std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries); 164 Common::ThreadWorker workers;
165 Common::ThreadWorker serialization_thread;
166};
173 167
174} // namespace Vulkan 168} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 7cadd5147..c9cb32d71 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -114,14 +114,10 @@ void HostCounter::EndQuery() {
114} 114}
115 115
116u64 HostCounter::BlockingQuery() const { 116u64 HostCounter::BlockingQuery() const {
117 if (tick >= cache.GetScheduler().CurrentTick()) { 117 cache.GetScheduler().Wait(tick);
118 cache.GetScheduler().Flush();
119 }
120
121 u64 data; 118 u64 data;
122 const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( 119 const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
123 query.first, query.second, 1, sizeof(data), &data, sizeof(data), 120 query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT);
124 VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
125 121
126 switch (query_result) { 122 switch (query_result) {
127 case VK_SUCCESS: 123 case VK_SUCCESS:
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f57c15b37..c7a07fdd8 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -24,7 +24,6 @@
24#include "video_core/renderer_vulkan/vk_buffer_cache.h" 24#include "video_core/renderer_vulkan/vk_buffer_cache.h"
25#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 25#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
26#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 26#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
27#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
28#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 27#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
29#include "video_core/renderer_vulkan/vk_rasterizer.h" 28#include "video_core/renderer_vulkan/vk_rasterizer.h"
30#include "video_core/renderer_vulkan/vk_scheduler.h" 29#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -55,11 +54,10 @@ struct DrawParams {
55 u32 num_instances; 54 u32 num_instances;
56 u32 base_vertex; 55 u32 base_vertex;
57 u32 num_vertices; 56 u32 num_vertices;
57 u32 first_index;
58 bool is_indexed; 58 bool is_indexed;
59}; 59};
60 60
61constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
62
63VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { 61VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) {
64 const auto& src = regs.viewport_transform[index]; 62 const auto& src = regs.viewport_transform[index];
65 const float width = src.scale_x * 2.0f; 63 const float width = src.scale_x * 2.0f;
@@ -97,118 +95,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
97 return scissor; 95 return scissor;
98} 96}
99 97
100std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
101 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
102 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
103 for (size_t i = 0; i < std::size(addresses); ++i) {
104 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
105 }
106 return addresses;
107}
108
109struct TextureHandle {
110 constexpr TextureHandle(u32 data, bool via_header_index) {
111 const Tegra::Texture::TextureHandle handle{data};
112 image = handle.tic_id;
113 sampler = via_header_index ? image : handle.tsc_id.Value();
114 }
115
116 u32 image;
117 u32 sampler;
118};
119
120template <typename Engine, typename Entry>
121TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
122 size_t stage, size_t index = 0) {
123 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
124 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
125 if (entry.is_separated) {
126 const u32 buffer_1 = entry.buffer;
127 const u32 buffer_2 = entry.secondary_buffer;
128 const u32 offset_1 = entry.offset;
129 const u32 offset_2 = entry.secondary_offset;
130 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
131 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
132 return TextureHandle(handle_1 | handle_2, via_header_index);
133 }
134 }
135 if (entry.is_bindless) {
136 const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
137 return TextureHandle(raw, via_header_index);
138 }
139 const u32 buffer = engine.GetBoundBuffer();
140 const u64 offset = (entry.offset + index) * sizeof(u32);
141 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
142}
143
144ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
145 if (entry.is_buffer) {
146 return ImageViewType::e2D;
147 }
148 switch (entry.type) {
149 case Tegra::Shader::TextureType::Texture1D:
150 return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
151 case Tegra::Shader::TextureType::Texture2D:
152 return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
153 case Tegra::Shader::TextureType::Texture3D:
154 return ImageViewType::e3D;
155 case Tegra::Shader::TextureType::TextureCube:
156 return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
157 }
158 UNREACHABLE();
159 return ImageViewType::e2D;
160}
161
162ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
163 switch (entry.type) {
164 case Tegra::Shader::ImageType::Texture1D:
165 return ImageViewType::e1D;
166 case Tegra::Shader::ImageType::Texture1DArray:
167 return ImageViewType::e1DArray;
168 case Tegra::Shader::ImageType::Texture2D:
169 return ImageViewType::e2D;
170 case Tegra::Shader::ImageType::Texture2DArray:
171 return ImageViewType::e2DArray;
172 case Tegra::Shader::ImageType::Texture3D:
173 return ImageViewType::e3D;
174 case Tegra::Shader::ImageType::TextureBuffer:
175 return ImageViewType::Buffer;
176 }
177 UNREACHABLE();
178 return ImageViewType::e2D;
179}
180
181void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache,
182 VKUpdateDescriptorQueue& update_descriptor_queue,
183 ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) {
184 for ([[maybe_unused]] const auto& entry : entries.uniform_texels) {
185 const ImageViewId image_view_id = *image_view_id_ptr++;
186 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
187 update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
188 }
189 for (const auto& entry : entries.samplers) {
190 for (size_t i = 0; i < entry.size; ++i) {
191 const VkSampler sampler = *sampler_ptr++;
192 const ImageViewId image_view_id = *image_view_id_ptr++;
193 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
194 const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
195 update_descriptor_queue.AddSampledImage(handle, sampler);
196 }
197 }
198 for ([[maybe_unused]] const auto& entry : entries.storage_texels) {
199 const ImageViewId image_view_id = *image_view_id_ptr++;
200 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
201 update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
202 }
203 for (const auto& entry : entries.images) {
204 // TODO: Mark as modified
205 const ImageViewId image_view_id = *image_view_id_ptr++;
206 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
207 const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
208 update_descriptor_queue.AddImage(handle);
209 }
210}
211
212DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, 98DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
213 bool is_indexed) { 99 bool is_indexed) {
214 DrawParams params{ 100 DrawParams params{
@@ -216,6 +102,7 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan
216 .num_instances = is_instanced ? num_instances : 1, 102 .num_instances = is_instanced ? num_instances : 1,
217 .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first, 103 .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first,
218 .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count, 104 .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count,
105 .first_index = is_indexed ? regs.index_array.first : 0,
219 .is_indexed = is_indexed, 106 .is_indexed = is_indexed,
220 }; 107 };
221 if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { 108 if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
@@ -243,21 +130,21 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
243 blit_image(device, scheduler, state_tracker, descriptor_pool), 130 blit_image(device, scheduler, state_tracker, descriptor_pool),
244 astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, 131 astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue,
245 memory_allocator), 132 memory_allocator),
246 texture_cache_runtime{device, scheduler, memory_allocator, 133 render_pass_cache(device), texture_cache_runtime{device, scheduler,
247 staging_pool, blit_image, astc_decoder_pass}, 134 memory_allocator, staging_pool,
135 blit_image, astc_decoder_pass,
136 render_pass_cache},
248 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), 137 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
249 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, 138 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
250 update_descriptor_queue, descriptor_pool), 139 update_descriptor_queue, descriptor_pool),
251 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), 140 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
252 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, 141 pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
253 descriptor_pool, update_descriptor_queue), 142 descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
143 texture_cache, gpu.ShaderNotify()),
254 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, 144 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache},
255 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), 145 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
256 wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { 146 wfi_event(device.GetLogical().CreateEvent()) {
257 scheduler.SetQueryCache(query_cache); 147 scheduler.SetQueryCache(query_cache);
258 if (device.UseAsynchronousShaders()) {
259 async_shaders.AllocateWorkers();
260 }
261} 148}
262 149
263RasterizerVulkan::~RasterizerVulkan() = default; 150RasterizerVulkan::~RasterizerVulkan() = default;
@@ -270,53 +157,30 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
270 157
271 query_cache.UpdateCounters(); 158 query_cache.UpdateCounters();
272 159
273 graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); 160 GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
274 161 if (!pipeline) {
275 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
276
277 texture_cache.SynchronizeGraphicsDescriptors();
278 texture_cache.UpdateRenderTargets(false);
279
280 const auto shaders = pipeline_cache.GetShaders();
281 graphics_key.shaders = GetShaderAddresses(shaders);
282
283 SetupShaderDescriptors(shaders, is_indexed);
284
285 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
286 graphics_key.renderpass = framebuffer->RenderPass();
287
288 VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline(
289 graphics_key, framebuffer->NumColorBuffers(), async_shaders);
290 if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
291 // Async graphics pipeline was not ready.
292 return; 162 return;
293 } 163 }
164 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
165 pipeline->Configure(is_indexed);
294 166
295 BeginTransformFeedback(); 167 BeginTransformFeedback();
296 168
297 scheduler.RequestRenderpass(framebuffer);
298 scheduler.BindGraphicsPipeline(pipeline->GetHandle());
299 UpdateDynamicStates(); 169 UpdateDynamicStates();
300 170
301 const auto& regs = maxwell3d.regs; 171 const auto& regs{maxwell3d.regs};
302 const u32 num_instances = maxwell3d.mme_draw.instance_count; 172 const u32 num_instances{maxwell3d.mme_draw.instance_count};
303 const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); 173 const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)};
304 const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); 174 scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
305 const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet();
306 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
307 if (descriptor_set) {
308 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
309 DESCRIPTOR_SET, descriptor_set, nullptr);
310 }
311 if (draw_params.is_indexed) { 175 if (draw_params.is_indexed) {
312 cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, 176 cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
313 draw_params.base_vertex, draw_params.base_instance); 177 draw_params.first_index, draw_params.base_vertex,
178 draw_params.base_instance);
314 } else { 179 } else {
315 cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, 180 cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
316 draw_params.base_vertex, draw_params.base_instance); 181 draw_params.base_vertex, draw_params.base_instance);
317 } 182 }
318 }); 183 });
319
320 EndTransformFeedback(); 184 EndTransformFeedback();
321} 185}
322 186
@@ -326,6 +190,7 @@ void RasterizerVulkan::Clear() {
326 if (!maxwell3d.ShouldExecute()) { 190 if (!maxwell3d.ShouldExecute()) {
327 return; 191 return;
328 } 192 }
193 FlushWork();
329 194
330 query_cache.UpdateCounters(); 195 query_cache.UpdateCounters();
331 196
@@ -395,73 +260,20 @@ void RasterizerVulkan::Clear() {
395 }); 260 });
396} 261}
397 262
398void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { 263void RasterizerVulkan::DispatchCompute() {
399 MICROPROFILE_SCOPE(Vulkan_Compute); 264 FlushWork();
400
401 query_cache.UpdateCounters();
402 265
403 const auto& launch_desc = kepler_compute.launch_description; 266 ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
404 auto& pipeline = pipeline_cache.GetComputePipeline({ 267 if (!pipeline) {
405 .shader = code_addr, 268 return;
406 .shared_memory_size = launch_desc.shared_alloc, 269 }
407 .workgroup_size{ 270 std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
408 launch_desc.block_dim_x, 271 pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache);
409 launch_desc.block_dim_y,
410 launch_desc.block_dim_z,
411 },
412 });
413 272
414 // Compute dispatches can't be executed inside a renderpass 273 const auto& qmd{kepler_compute.launch_description};
274 const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
415 scheduler.RequestOutsideRenderPassOperationContext(); 275 scheduler.RequestOutsideRenderPassOperationContext();
416 276 scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
417 image_view_indices.clear();
418 sampler_handles.clear();
419
420 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
421
422 const auto& entries = pipeline.GetEntries();
423 buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
424 buffer_cache.UnbindComputeStorageBuffers();
425 u32 ssbo_index = 0;
426 for (const auto& buffer : entries.global_buffers) {
427 buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
428 buffer.is_written);
429 ++ssbo_index;
430 }
431 buffer_cache.UpdateComputeBuffers();
432
433 texture_cache.SynchronizeComputeDescriptors();
434
435 SetupComputeUniformTexels(entries);
436 SetupComputeTextures(entries);
437 SetupComputeStorageTexels(entries);
438 SetupComputeImages(entries);
439
440 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
441 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
442
443 update_descriptor_queue.Acquire();
444
445 buffer_cache.BindHostComputeBuffers();
446
447 ImageViewId* image_view_id_ptr = image_view_ids.data();
448 VkSampler* sampler_ptr = sampler_handles.data();
449 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
450 sampler_ptr);
451
452 const VkPipeline pipeline_handle = pipeline.GetHandle();
453 const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
454 const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
455 scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
456 grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout,
457 descriptor_set](vk::CommandBuffer cmdbuf) {
458 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
459 if (descriptor_set) {
460 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout,
461 DESCRIPTOR_SET, descriptor_set, nullptr);
462 }
463 cmdbuf.Dispatch(grid_x, grid_y, grid_z);
464 });
465} 277}
466 278
467void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { 279void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
@@ -626,6 +438,7 @@ void RasterizerVulkan::WaitForIdle() {
626 438
627void RasterizerVulkan::FragmentBarrier() { 439void RasterizerVulkan::FragmentBarrier() {
628 // We already put barriers when a render pass finishes 440 // We already put barriers when a render pass finishes
441 scheduler.RequestOutsideRenderPassOperationContext();
629} 442}
630 443
631void RasterizerVulkan::TiledCacheBarrier() { 444void RasterizerVulkan::TiledCacheBarrier() {
@@ -633,10 +446,11 @@ void RasterizerVulkan::TiledCacheBarrier() {
633} 446}
634 447
635void RasterizerVulkan::FlushCommands() { 448void RasterizerVulkan::FlushCommands() {
636 if (draw_counter > 0) { 449 if (draw_counter == 0) {
637 draw_counter = 0; 450 return;
638 scheduler.Flush();
639 } 451 }
452 draw_counter = 0;
453 scheduler.Flush();
640} 454}
641 455
642void RasterizerVulkan::TickFrame() { 456void RasterizerVulkan::TickFrame() {
@@ -676,13 +490,18 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
676 if (!image_view) { 490 if (!image_view) {
677 return false; 491 return false;
678 } 492 }
679 screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); 493 screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D);
680 screen_info.width = image_view->size.width; 494 screen_info.width = image_view->size.width;
681 screen_info.height = image_view->size.height; 495 screen_info.height = image_view->size.height;
682 screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); 496 screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
683 return true; 497 return true;
684} 498}
685 499
500void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
501 const VideoCore::DiskResourceLoadCallback& callback) {
502 pipeline_cache.LoadDiskResources(title_id, stop_loading, callback);
503}
504
686void RasterizerVulkan::FlushWork() { 505void RasterizerVulkan::FlushWork() {
687 static constexpr u32 DRAWS_TO_DISPATCH = 4096; 506 static constexpr u32 DRAWS_TO_DISPATCH = 4096;
688 507
@@ -691,13 +510,11 @@ void RasterizerVulkan::FlushWork() {
691 if ((++draw_counter & 7) != 7) { 510 if ((++draw_counter & 7) != 7) {
692 return; 511 return;
693 } 512 }
694
695 if (draw_counter < DRAWS_TO_DISPATCH) { 513 if (draw_counter < DRAWS_TO_DISPATCH) {
696 // Send recorded tasks to the worker thread 514 // Send recorded tasks to the worker thread
697 scheduler.DispatchWork(); 515 scheduler.DispatchWork();
698 return; 516 return;
699 } 517 }
700
701 // Otherwise (every certain number of draws) flush execution. 518 // Otherwise (every certain number of draws) flush execution.
702 // This submits commands to the Vulkan driver. 519 // This submits commands to the Vulkan driver.
703 scheduler.Flush(); 520 scheduler.Flush();
@@ -716,52 +533,6 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64
716 return buffer_cache.DMACopy(src_address, dest_address, amount); 533 return buffer_cache.DMACopy(src_address, dest_address, amount);
717} 534}
718 535
719void RasterizerVulkan::SetupShaderDescriptors(
720 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) {
721 image_view_indices.clear();
722 sampler_handles.clear();
723 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
724 Shader* const shader = shaders[stage + 1];
725 if (!shader) {
726 continue;
727 }
728 const ShaderEntries& entries = shader->GetEntries();
729 SetupGraphicsUniformTexels(entries, stage);
730 SetupGraphicsTextures(entries, stage);
731 SetupGraphicsStorageTexels(entries, stage);
732 SetupGraphicsImages(entries, stage);
733
734 buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers);
735 buffer_cache.UnbindGraphicsStorageBuffers(stage);
736 u32 ssbo_index = 0;
737 for (const auto& buffer : entries.global_buffers) {
738 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
739 buffer.cbuf_offset, buffer.is_written);
740 ++ssbo_index;
741 }
742 }
743 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
744 buffer_cache.UpdateGraphicsBuffers(is_indexed);
745 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
746
747 buffer_cache.BindHostGeometryBuffers(is_indexed);
748
749 update_descriptor_queue.Acquire();
750
751 ImageViewId* image_view_id_ptr = image_view_ids.data();
752 VkSampler* sampler_ptr = sampler_handles.data();
753 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
754 // Skip VertexA stage
755 Shader* const shader = shaders[stage + 1];
756 if (!shader) {
757 continue;
758 }
759 buffer_cache.BindHostStageBuffers(stage);
760 PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue,
761 image_view_id_ptr, sampler_ptr);
762 }
763}
764
765void RasterizerVulkan::UpdateDynamicStates() { 536void RasterizerVulkan::UpdateDynamicStates() {
766 auto& regs = maxwell3d.regs; 537 auto& regs = maxwell3d.regs;
767 UpdateViewportsState(regs); 538 UpdateViewportsState(regs);
@@ -770,6 +541,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
770 UpdateBlendConstants(regs); 541 UpdateBlendConstants(regs);
771 UpdateDepthBounds(regs); 542 UpdateDepthBounds(regs);
772 UpdateStencilFaces(regs); 543 UpdateStencilFaces(regs);
544 UpdateLineWidth(regs);
773 if (device.IsExtExtendedDynamicStateSupported()) { 545 if (device.IsExtExtendedDynamicStateSupported()) {
774 UpdateCullMode(regs); 546 UpdateCullMode(regs);
775 UpdateDepthBoundsTestEnable(regs); 547 UpdateDepthBoundsTestEnable(regs);
@@ -779,6 +551,9 @@ void RasterizerVulkan::UpdateDynamicStates() {
779 UpdateFrontFace(regs); 551 UpdateFrontFace(regs);
780 UpdateStencilOp(regs); 552 UpdateStencilOp(regs);
781 UpdateStencilTestEnable(regs); 553 UpdateStencilTestEnable(regs);
554 if (device.IsExtVertexInputDynamicStateSupported()) {
555 UpdateVertexInput(regs);
556 }
782 } 557 }
783} 558}
784 559
@@ -810,89 +585,6 @@ void RasterizerVulkan::EndTransformFeedback() {
810 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); 585 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
811} 586}
812 587
813void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
814 const auto& regs = maxwell3d.regs;
815 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
816 for (const auto& entry : entries.uniform_texels) {
817 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
818 image_view_indices.push_back(handle.image);
819 }
820}
821
822void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
823 const auto& regs = maxwell3d.regs;
824 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
825 for (const auto& entry : entries.samplers) {
826 for (size_t index = 0; index < entry.size; ++index) {
827 const TextureHandle handle =
828 GetTextureInfo(maxwell3d, via_header_index, entry, stage, index);
829 image_view_indices.push_back(handle.image);
830
831 Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
832 sampler_handles.push_back(sampler->Handle());
833 }
834 }
835}
836
837void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
838 const auto& regs = maxwell3d.regs;
839 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
840 for (const auto& entry : entries.storage_texels) {
841 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
842 image_view_indices.push_back(handle.image);
843 }
844}
845
846void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
847 const auto& regs = maxwell3d.regs;
848 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
849 for (const auto& entry : entries.images) {
850 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
851 image_view_indices.push_back(handle.image);
852 }
853}
854
855void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
856 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
857 for (const auto& entry : entries.uniform_texels) {
858 const TextureHandle handle =
859 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
860 image_view_indices.push_back(handle.image);
861 }
862}
863
864void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
865 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
866 for (const auto& entry : entries.samplers) {
867 for (size_t index = 0; index < entry.size; ++index) {
868 const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry,
869 COMPUTE_SHADER_INDEX, index);
870 image_view_indices.push_back(handle.image);
871
872 Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
873 sampler_handles.push_back(sampler->Handle());
874 }
875 }
876}
877
878void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
879 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
880 for (const auto& entry : entries.storage_texels) {
881 const TextureHandle handle =
882 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
883 image_view_indices.push_back(handle.image);
884 }
885}
886
887void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
888 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
889 for (const auto& entry : entries.images) {
890 const TextureHandle handle =
891 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
892 image_view_indices.push_back(handle.image);
893 }
894}
895
896void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { 588void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
897 if (!state_tracker.TouchViewports()) { 589 if (!state_tracker.TouchViewports()) {
898 return; 590 return;
@@ -985,6 +677,14 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs)
985 } 677 }
986} 678}
987 679
680void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) {
681 if (!state_tracker.TouchLineWidth()) {
682 return;
683 }
684 const float width = regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased;
685 scheduler.Record([width](vk::CommandBuffer cmdbuf) { cmdbuf.SetLineWidth(width); });
686}
687
988void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { 688void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) {
989 if (!state_tracker.TouchCullMode()) { 689 if (!state_tracker.TouchCullMode()) {
990 return; 690 return;
@@ -999,6 +699,11 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re
999 if (!state_tracker.TouchDepthBoundsTestEnable()) { 699 if (!state_tracker.TouchDepthBoundsTestEnable()) {
1000 return; 700 return;
1001 } 701 }
702 bool enabled = regs.depth_bounds_enable;
703 if (enabled && !device.IsDepthBoundsSupported()) {
704 LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
705 enabled = false;
706 }
1002 scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) { 707 scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) {
1003 cmdbuf.SetDepthBoundsTestEnableEXT(enable); 708 cmdbuf.SetDepthBoundsTestEnableEXT(enable);
1004 }); 709 });
@@ -1086,4 +791,62 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
1086 }); 791 });
1087} 792}
1088 793
794void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) {
795 auto& dirty{maxwell3d.dirty.flags};
796 if (!dirty[Dirty::VertexInput]) {
797 return;
798 }
799 dirty[Dirty::VertexInput] = false;
800
801 boost::container::static_vector<VkVertexInputBindingDescription2EXT, 32> bindings;
802 boost::container::static_vector<VkVertexInputAttributeDescription2EXT, 32> attributes;
803
804 // There seems to be a bug on Nvidia's driver where updating only higher attributes ends up
805 // generating dirty state. Track the highest dirty attribute and update all attributes until
806 // that one.
807 size_t highest_dirty_attr{};
808 for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
809 if (dirty[Dirty::VertexAttribute0 + index]) {
810 highest_dirty_attr = index;
811 }
812 }
813 for (size_t index = 0; index < highest_dirty_attr; ++index) {
814 const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]};
815 const u32 binding{attribute.buffer};
816 dirty[Dirty::VertexAttribute0 + index] = false;
817 dirty[Dirty::VertexBinding0 + static_cast<size_t>(binding)] = true;
818 if (!attribute.constant) {
819 attributes.push_back({
820 .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT,
821 .pNext = nullptr,
822 .location = static_cast<u32>(index),
823 .binding = binding,
824 .format = MaxwellToVK::VertexFormat(attribute.type, attribute.size),
825 .offset = attribute.offset,
826 });
827 }
828 }
829 for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
830 if (!dirty[Dirty::VertexBinding0 + index]) {
831 continue;
832 }
833 dirty[Dirty::VertexBinding0 + index] = false;
834
835 const u32 binding{static_cast<u32>(index)};
836 const auto& input_binding{regs.vertex_array[binding]};
837 const bool is_instanced{regs.instanced_arrays.IsInstancingEnabled(binding)};
838 bindings.push_back({
839 .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT,
840 .pNext = nullptr,
841 .binding = binding,
842 .stride = input_binding.stride,
843 .inputRate = is_instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX,
844 .divisor = is_instanced ? input_binding.divisor : 1,
845 });
846 }
847 scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf) {
848 cmdbuf.SetVertexInputEXT(bindings, attributes);
849 });
850}
851
1089} // namespace Vulkan 852} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 2065209be..866827247 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -21,14 +21,13 @@
21#include "video_core/renderer_vulkan/vk_buffer_cache.h" 21#include "video_core/renderer_vulkan/vk_buffer_cache.h"
22#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 22#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
23#include "video_core/renderer_vulkan/vk_fence_manager.h" 23#include "video_core/renderer_vulkan/vk_fence_manager.h"
24#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
25#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 24#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
26#include "video_core/renderer_vulkan/vk_query_cache.h" 25#include "video_core/renderer_vulkan/vk_query_cache.h"
26#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
27#include "video_core/renderer_vulkan/vk_scheduler.h" 27#include "video_core/renderer_vulkan/vk_scheduler.h"
28#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 28#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
29#include "video_core/renderer_vulkan/vk_texture_cache.h" 29#include "video_core/renderer_vulkan/vk_texture_cache.h"
30#include "video_core/renderer_vulkan/vk_update_descriptor.h" 30#include "video_core/renderer_vulkan/vk_update_descriptor.h"
31#include "video_core/shader/async_shaders.h"
32#include "video_core/vulkan_common/vulkan_memory_allocator.h" 31#include "video_core/vulkan_common/vulkan_memory_allocator.h"
33#include "video_core/vulkan_common/vulkan_wrapper.h" 32#include "video_core/vulkan_common/vulkan_wrapper.h"
34 33
@@ -73,7 +72,7 @@ public:
73 72
74 void Draw(bool is_indexed, bool is_instanced) override; 73 void Draw(bool is_indexed, bool is_instanced) override;
75 void Clear() override; 74 void Clear() override;
76 void DispatchCompute(GPUVAddr code_addr) override; 75 void DispatchCompute() override;
77 void ResetCounter(VideoCore::QueryType type) override; 76 void ResetCounter(VideoCore::QueryType type) override;
78 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 77 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
79 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 78 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@@ -102,19 +101,8 @@ public:
102 Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; 101 Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
103 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 102 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
104 u32 pixel_stride) override; 103 u32 pixel_stride) override;
105 104 void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
106 VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { 105 const VideoCore::DiskResourceLoadCallback& callback) override;
107 return async_shaders;
108 }
109
110 const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
111 return async_shaders;
112 }
113
114 /// Maximum supported size that a constbuffer can have in bytes.
115 static constexpr size_t MaxConstbufferSize = 0x10000;
116 static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
117 "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
118 106
119private: 107private:
120 static constexpr size_t MAX_TEXTURES = 192; 108 static constexpr size_t MAX_TEXTURES = 192;
@@ -125,46 +113,19 @@ private:
125 113
126 void FlushWork(); 114 void FlushWork();
127 115
128 /// Setup descriptors in the graphics pipeline.
129 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
130 bool is_indexed);
131
132 void UpdateDynamicStates(); 116 void UpdateDynamicStates();
133 117
134 void BeginTransformFeedback(); 118 void BeginTransformFeedback();
135 119
136 void EndTransformFeedback(); 120 void EndTransformFeedback();
137 121
138 /// Setup uniform texels in the graphics pipeline.
139 void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
140
141 /// Setup textures in the graphics pipeline.
142 void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
143
144 /// Setup storage texels in the graphics pipeline.
145 void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage);
146
147 /// Setup images in the graphics pipeline.
148 void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
149
150 /// Setup texel buffers in the compute pipeline.
151 void SetupComputeUniformTexels(const ShaderEntries& entries);
152
153 /// Setup textures in the compute pipeline.
154 void SetupComputeTextures(const ShaderEntries& entries);
155
156 /// Setup storage texels in the compute pipeline.
157 void SetupComputeStorageTexels(const ShaderEntries& entries);
158
159 /// Setup images in the compute pipeline.
160 void SetupComputeImages(const ShaderEntries& entries);
161
162 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); 122 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
163 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); 123 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
164 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); 124 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
165 void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs); 125 void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs);
166 void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); 126 void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
167 void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); 127 void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
128 void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs);
168 129
169 void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs); 130 void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs);
170 void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); 131 void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -175,6 +136,8 @@ private:
175 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); 136 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
176 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); 137 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
177 138
139 void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
140
178 Tegra::GPU& gpu; 141 Tegra::GPU& gpu;
179 Tegra::MemoryManager& gpu_memory; 142 Tegra::MemoryManager& gpu_memory;
180 Tegra::Engines::Maxwell3D& maxwell3d; 143 Tegra::Engines::Maxwell3D& maxwell3d;
@@ -187,24 +150,22 @@ private:
187 VKScheduler& scheduler; 150 VKScheduler& scheduler;
188 151
189 StagingBufferPool staging_pool; 152 StagingBufferPool staging_pool;
190 VKDescriptorPool descriptor_pool; 153 DescriptorPool descriptor_pool;
191 VKUpdateDescriptorQueue update_descriptor_queue; 154 VKUpdateDescriptorQueue update_descriptor_queue;
192 BlitImageHelper blit_image; 155 BlitImageHelper blit_image;
193 ASTCDecoderPass astc_decoder_pass; 156 ASTCDecoderPass astc_decoder_pass;
194 157 RenderPassCache render_pass_cache;
195 GraphicsPipelineCacheKey graphics_key;
196 158
197 TextureCacheRuntime texture_cache_runtime; 159 TextureCacheRuntime texture_cache_runtime;
198 TextureCache texture_cache; 160 TextureCache texture_cache;
199 BufferCacheRuntime buffer_cache_runtime; 161 BufferCacheRuntime buffer_cache_runtime;
200 BufferCache buffer_cache; 162 BufferCache buffer_cache;
201 VKPipelineCache pipeline_cache; 163 PipelineCache pipeline_cache;
202 VKQueryCache query_cache; 164 VKQueryCache query_cache;
203 AccelerateDMA accelerate_dma; 165 AccelerateDMA accelerate_dma;
204 VKFenceManager fence_manager; 166 VKFenceManager fence_manager;
205 167
206 vk::Event wfi_event; 168 vk::Event wfi_event;
207 VideoCommon::Shader::AsyncShaders async_shaders;
208 169
209 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; 170 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
210 std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; 171 std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
new file mode 100644
index 000000000..451ffe019
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
@@ -0,0 +1,96 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <unordered_map>
6
7#include <boost/container/static_vector.hpp>
8
9#include "video_core/renderer_vulkan/maxwell_to_vk.h"
10#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
11#include "video_core/surface.h"
12#include "video_core/vulkan_common/vulkan_device.h"
13#include "video_core/vulkan_common/vulkan_wrapper.h"
14
15namespace Vulkan {
16namespace {
17using VideoCore::Surface::PixelFormat;
18
19VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format,
20 VkSampleCountFlagBits samples) {
21 using MaxwellToVK::SurfaceFormat;
22 return {
23 .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
24 .format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
25 .samples = samples,
26 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
27 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
28 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
29 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
30 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
31 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
32 };
33}
34} // Anonymous namespace
35
36RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {}
37
38VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
39 std::lock_guard lock{mutex};
40 const auto [pair, is_new] = cache.try_emplace(key);
41 if (!is_new) {
42 return *pair->second;
43 }
44 boost::container::static_vector<VkAttachmentDescription, 9> descriptions;
45 std::array<VkAttachmentReference, 8> references{};
46 u32 num_attachments{};
47 u32 num_colors{};
48 for (size_t index = 0; index < key.color_formats.size(); ++index) {
49 const PixelFormat format{key.color_formats[index]};
50 const bool is_valid{format != PixelFormat::Invalid};
51 references[index] = VkAttachmentReference{
52 .attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED,
53 .layout = VK_IMAGE_LAYOUT_GENERAL,
54 };
55 if (is_valid) {
56 descriptions.push_back(AttachmentDescription(*device, format, key.samples));
57 num_attachments = static_cast<u32>(index + 1);
58 ++num_colors;
59 }
60 }
61 const bool has_depth{key.depth_format != PixelFormat::Invalid};
62 VkAttachmentReference depth_reference{};
63 if (key.depth_format != PixelFormat::Invalid) {
64 depth_reference = VkAttachmentReference{
65 .attachment = num_colors,
66 .layout = VK_IMAGE_LAYOUT_GENERAL,
67 };
68 descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples));
69 }
70 const VkSubpassDescription subpass{
71 .flags = 0,
72 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
73 .inputAttachmentCount = 0,
74 .pInputAttachments = nullptr,
75 .colorAttachmentCount = num_attachments,
76 .pColorAttachments = references.data(),
77 .pResolveAttachments = nullptr,
78 .pDepthStencilAttachment = has_depth ? &depth_reference : nullptr,
79 .preserveAttachmentCount = 0,
80 .pPreserveAttachments = nullptr,
81 };
82 pair->second = device->GetLogical().CreateRenderPass({
83 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
84 .pNext = nullptr,
85 .flags = 0,
86 .attachmentCount = static_cast<u32>(descriptions.size()),
87 .pAttachments = descriptions.empty() ? nullptr : descriptions.data(),
88 .subpassCount = 1,
89 .pSubpasses = &subpass,
90 .dependencyCount = 0,
91 .pDependencies = nullptr,
92 });
93 return *pair->second;
94}
95
96} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
new file mode 100644
index 000000000..eaa0ed775
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <mutex>
8#include <unordered_map>
9
10#include "video_core/surface.h"
11#include "video_core/vulkan_common/vulkan_wrapper.h"
12
13namespace Vulkan {
14
15struct RenderPassKey {
16 auto operator<=>(const RenderPassKey&) const noexcept = default;
17
18 std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
19 VideoCore::Surface::PixelFormat depth_format;
20 VkSampleCountFlagBits samples;
21};
22
23} // namespace Vulkan
24
25namespace std {
26template <>
27struct hash<Vulkan::RenderPassKey> {
28 [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
29 size_t value = static_cast<size_t>(key.depth_format) << 48;
30 value ^= static_cast<size_t>(key.samples) << 52;
31 for (size_t i = 0; i < key.color_formats.size(); ++i) {
32 value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
33 }
34 return value;
35 }
36};
37} // namespace std
38
39namespace Vulkan {
40
41class Device;
42
43class RenderPassCache {
44public:
45 explicit RenderPassCache(const Device& device_);
46
47 VkRenderPass Get(const RenderPassKey& key);
48
49private:
50 const Device* device{};
51 std::unordered_map<RenderPassKey, vk::RenderPass> cache;
52 std::mutex mutex;
53};
54
55} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
index a8bf7bda8..2dd514968 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -10,18 +10,16 @@
10namespace Vulkan { 10namespace Vulkan {
11 11
12ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) 12ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
13 : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} 13 : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {}
14
15ResourcePool::~ResourcePool() = default;
16 14
17size_t ResourcePool::CommitResource() { 15size_t ResourcePool::CommitResource() {
18 // Refresh semaphore to query updated results 16 // Refresh semaphore to query updated results
19 master_semaphore.Refresh(); 17 master_semaphore->Refresh();
20 const u64 gpu_tick = master_semaphore.KnownGpuTick(); 18 const u64 gpu_tick = master_semaphore->KnownGpuTick();
21 const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> { 19 const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
22 for (size_t iterator = begin; iterator < end; ++iterator) { 20 for (size_t iterator = begin; iterator < end; ++iterator) {
23 if (gpu_tick >= ticks[iterator]) { 21 if (gpu_tick >= ticks[iterator]) {
24 ticks[iterator] = master_semaphore.CurrentTick(); 22 ticks[iterator] = master_semaphore->CurrentTick();
25 return iterator; 23 return iterator;
26 } 24 }
27 } 25 }
@@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() {
36 // Both searches failed, the pool is full; handle it. 34 // Both searches failed, the pool is full; handle it.
37 const size_t free_resource = ManageOverflow(); 35 const size_t free_resource = ManageOverflow();
38 36
39 ticks[free_resource] = master_semaphore.CurrentTick(); 37 ticks[free_resource] = master_semaphore->CurrentTick();
40 found = free_resource; 38 found = free_resource;
41 } 39 }
42 } 40 }
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h
index 9d0bb3b4d..f0b80ad59 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.h
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.h
@@ -18,8 +18,16 @@ class MasterSemaphore;
18 */ 18 */
19class ResourcePool { 19class ResourcePool {
20public: 20public:
21 explicit ResourcePool() = default;
21 explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step); 22 explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
22 virtual ~ResourcePool(); 23
24 virtual ~ResourcePool() = default;
25
26 ResourcePool& operator=(ResourcePool&&) noexcept = default;
27 ResourcePool(ResourcePool&&) noexcept = default;
28
29 ResourcePool& operator=(const ResourcePool&) = default;
30 ResourcePool(const ResourcePool&) = default;
23 31
24protected: 32protected:
25 size_t CommitResource(); 33 size_t CommitResource();
@@ -34,7 +42,7 @@ private:
34 /// Allocates a new page of resources. 42 /// Allocates a new page of resources.
35 void Grow(); 43 void Grow();
36 44
37 MasterSemaphore& master_semaphore; 45 MasterSemaphore* master_semaphore{};
38 size_t grow_step = 0; ///< Number of new resources created after an overflow 46 size_t grow_step = 0; ///< Number of new resources created after an overflow
39 size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found 47 size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
40 std::vector<u64> ticks; ///< Ticks for each resource 48 std::vector<u64> ticks; ///< Ticks for each resource
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index f35c120b0..4840962de 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -31,7 +31,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
31 command->~Command(); 31 command->~Command();
32 command = next; 32 command = next;
33 } 33 }
34 34 submit = false;
35 command_offset = 0; 35 command_offset = 0;
36 first = nullptr; 36 first = nullptr;
37 last = nullptr; 37 last = nullptr;
@@ -42,13 +42,16 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_)
42 master_semaphore{std::make_unique<MasterSemaphore>(device)}, 42 master_semaphore{std::make_unique<MasterSemaphore>(device)},
43 command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} { 43 command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
44 AcquireNewChunk(); 44 AcquireNewChunk();
45 AllocateNewContext(); 45 AllocateWorkerCommandBuffer();
46 worker_thread = std::thread(&VKScheduler::WorkerThread, this); 46 worker_thread = std::thread(&VKScheduler::WorkerThread, this);
47} 47}
48 48
49VKScheduler::~VKScheduler() { 49VKScheduler::~VKScheduler() {
50 quit = true; 50 {
51 cv.notify_all(); 51 std::lock_guard lock{work_mutex};
52 quit = true;
53 }
54 work_cv.notify_all();
52 worker_thread.join(); 55 worker_thread.join();
53} 56}
54 57
@@ -60,6 +63,7 @@ void VKScheduler::Flush(VkSemaphore semaphore) {
60void VKScheduler::Finish(VkSemaphore semaphore) { 63void VKScheduler::Finish(VkSemaphore semaphore) {
61 const u64 presubmit_tick = CurrentTick(); 64 const u64 presubmit_tick = CurrentTick();
62 SubmitExecution(semaphore); 65 SubmitExecution(semaphore);
66 WaitWorker();
63 Wait(presubmit_tick); 67 Wait(presubmit_tick);
64 AllocateNewContext(); 68 AllocateNewContext();
65} 69}
@@ -68,20 +72,19 @@ void VKScheduler::WaitWorker() {
68 MICROPROFILE_SCOPE(Vulkan_WaitForWorker); 72 MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
69 DispatchWork(); 73 DispatchWork();
70 74
71 bool finished = false; 75 std::unique_lock lock{work_mutex};
72 do { 76 wait_cv.wait(lock, [this] { return work_queue.empty(); });
73 cv.notify_all();
74 std::unique_lock lock{mutex};
75 finished = chunk_queue.Empty();
76 } while (!finished);
77} 77}
78 78
79void VKScheduler::DispatchWork() { 79void VKScheduler::DispatchWork() {
80 if (chunk->Empty()) { 80 if (chunk->Empty()) {
81 return; 81 return;
82 } 82 }
83 chunk_queue.Push(std::move(chunk)); 83 {
84 cv.notify_all(); 84 std::lock_guard lock{work_mutex};
85 work_queue.push(std::move(chunk));
86 }
87 work_cv.notify_one();
85 AcquireNewChunk(); 88 AcquireNewChunk();
86} 89}
87 90
@@ -124,93 +127,101 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() {
124 EndRenderPass(); 127 EndRenderPass();
125} 128}
126 129
127void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { 130bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
128 if (state.graphics_pipeline == pipeline) { 131 if (state.graphics_pipeline == pipeline) {
129 return; 132 return false;
130 } 133 }
131 state.graphics_pipeline = pipeline; 134 state.graphics_pipeline = pipeline;
132 Record([pipeline](vk::CommandBuffer cmdbuf) { 135 return true;
133 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
134 });
135} 136}
136 137
137void VKScheduler::WorkerThread() { 138void VKScheduler::WorkerThread() {
138 Common::SetCurrentThreadPriority(Common::ThreadPriority::High); 139 Common::SetCurrentThreadName("yuzu:VulkanWorker");
139 std::unique_lock lock{mutex};
140 do { 140 do {
141 cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); 141 if (work_queue.empty()) {
142 if (quit) { 142 wait_cv.notify_all();
143 continue; 143 }
144 std::unique_ptr<CommandChunk> work;
145 {
146 std::unique_lock lock{work_mutex};
147 work_cv.wait(lock, [this] { return !work_queue.empty() || quit; });
148 if (quit) {
149 continue;
150 }
151 work = std::move(work_queue.front());
152 work_queue.pop();
153 }
154 const bool has_submit = work->HasSubmit();
155 work->ExecuteAll(current_cmdbuf);
156 if (has_submit) {
157 AllocateWorkerCommandBuffer();
144 } 158 }
145 auto extracted_chunk = std::move(chunk_queue.Front()); 159 std::lock_guard reserve_lock{reserve_mutex};
146 chunk_queue.Pop(); 160 chunk_reserve.push_back(std::move(work));
147 extracted_chunk->ExecuteAll(current_cmdbuf);
148 chunk_reserve.Push(std::move(extracted_chunk));
149 } while (!quit); 161 } while (!quit);
150} 162}
151 163
164void VKScheduler::AllocateWorkerCommandBuffer() {
165 current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
166 current_cmdbuf.Begin({
167 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
168 .pNext = nullptr,
169 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
170 .pInheritanceInfo = nullptr,
171 });
172}
173
152void VKScheduler::SubmitExecution(VkSemaphore semaphore) { 174void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
153 EndPendingOperations(); 175 EndPendingOperations();
154 InvalidateState(); 176 InvalidateState();
155 WaitWorker();
156 177
157 std::unique_lock lock{mutex}; 178 const u64 signal_value = master_semaphore->NextTick();
179 Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
180 cmdbuf.End();
158 181
159 current_cmdbuf.End(); 182 const u32 num_signal_semaphores = semaphore ? 2U : 1U;
160 183
161 const VkSemaphore timeline_semaphore = master_semaphore->Handle(); 184 const u64 wait_value = signal_value - 1;
162 const u32 num_signal_semaphores = semaphore ? 2U : 1U; 185 const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
163 186
164 const u64 signal_value = master_semaphore->CurrentTick(); 187 const VkSemaphore timeline_semaphore = master_semaphore->Handle();
165 const u64 wait_value = signal_value - 1; 188 const std::array signal_values{signal_value, u64(0)};
166 const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; 189 const std::array signal_semaphores{timeline_semaphore, semaphore};
167 190
168 master_semaphore->NextTick(); 191 const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
169 192 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
170 const std::array signal_values{signal_value, u64(0)}; 193 .pNext = nullptr,
171 const std::array signal_semaphores{timeline_semaphore, semaphore}; 194 .waitSemaphoreValueCount = 1,
172 195 .pWaitSemaphoreValues = &wait_value,
173 const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ 196 .signalSemaphoreValueCount = num_signal_semaphores,
174 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, 197 .pSignalSemaphoreValues = signal_values.data(),
175 .pNext = nullptr, 198 };
176 .waitSemaphoreValueCount = 1, 199 const VkSubmitInfo submit_info{
177 .pWaitSemaphoreValues = &wait_value, 200 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
178 .signalSemaphoreValueCount = num_signal_semaphores, 201 .pNext = &timeline_si,
179 .pSignalSemaphoreValues = signal_values.data(), 202 .waitSemaphoreCount = 1,
180 }; 203 .pWaitSemaphores = &timeline_semaphore,
181 const VkSubmitInfo submit_info{ 204 .pWaitDstStageMask = &wait_stage_mask,
182 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, 205 .commandBufferCount = 1,
183 .pNext = &timeline_si, 206 .pCommandBuffers = cmdbuf.address(),
184 .waitSemaphoreCount = 1, 207 .signalSemaphoreCount = num_signal_semaphores,
185 .pWaitSemaphores = &timeline_semaphore, 208 .pSignalSemaphores = signal_semaphores.data(),
186 .pWaitDstStageMask = &wait_stage_mask, 209 };
187 .commandBufferCount = 1, 210 switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
188 .pCommandBuffers = current_cmdbuf.address(), 211 case VK_SUCCESS:
189 .signalSemaphoreCount = num_signal_semaphores, 212 break;
190 .pSignalSemaphores = signal_semaphores.data(), 213 case VK_ERROR_DEVICE_LOST:
191 }; 214 device.ReportLoss();
192 switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { 215 [[fallthrough]];
193 case VK_SUCCESS: 216 default:
194 break; 217 vk::Check(result);
195 case VK_ERROR_DEVICE_LOST: 218 }
196 device.ReportLoss(); 219 });
197 [[fallthrough]]; 220 chunk->MarkSubmit();
198 default: 221 DispatchWork();
199 vk::Check(result);
200 }
201} 222}
202 223
203void VKScheduler::AllocateNewContext() { 224void VKScheduler::AllocateNewContext() {
204 std::unique_lock lock{mutex};
205
206 current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
207 current_cmdbuf.Begin({
208 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
209 .pNext = nullptr,
210 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
211 .pInheritanceInfo = nullptr,
212 });
213
214 // Enable counters once again. These are disabled when a command buffer is finished. 225 // Enable counters once again. These are disabled when a command buffer is finished.
215 if (query_cache) { 226 if (query_cache) {
216 query_cache->UpdateCounters(); 227 query_cache->UpdateCounters();
@@ -265,12 +276,13 @@ void VKScheduler::EndRenderPass() {
265} 276}
266 277
267void VKScheduler::AcquireNewChunk() { 278void VKScheduler::AcquireNewChunk() {
268 if (chunk_reserve.Empty()) { 279 std::lock_guard lock{reserve_mutex};
280 if (chunk_reserve.empty()) {
269 chunk = std::make_unique<CommandChunk>(); 281 chunk = std::make_unique<CommandChunk>();
270 return; 282 return;
271 } 283 }
272 chunk = std::move(chunk_reserve.Front()); 284 chunk = std::move(chunk_reserve.back());
273 chunk_reserve.Pop(); 285 chunk_reserve.pop_back();
274} 286}
275 287
276} // namespace Vulkan 288} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 3ce48e9d2..cf39a2363 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -8,12 +8,12 @@
8#include <condition_variable> 8#include <condition_variable>
9#include <cstddef> 9#include <cstddef>
10#include <memory> 10#include <memory>
11#include <stack>
12#include <thread> 11#include <thread>
13#include <utility> 12#include <utility>
13#include <queue>
14
14#include "common/alignment.h" 15#include "common/alignment.h"
15#include "common/common_types.h" 16#include "common/common_types.h"
16#include "common/threadsafe_queue.h"
17#include "video_core/renderer_vulkan/vk_master_semaphore.h" 17#include "video_core/renderer_vulkan/vk_master_semaphore.h"
18#include "video_core/vulkan_common/vulkan_wrapper.h" 18#include "video_core/vulkan_common/vulkan_wrapper.h"
19 19
@@ -22,6 +22,7 @@ namespace Vulkan {
22class CommandPool; 22class CommandPool;
23class Device; 23class Device;
24class Framebuffer; 24class Framebuffer;
25class GraphicsPipeline;
25class StateTracker; 26class StateTracker;
26class VKQueryCache; 27class VKQueryCache;
27 28
@@ -52,8 +53,8 @@ public:
52 /// of a renderpass. 53 /// of a renderpass.
53 void RequestOutsideRenderPassOperationContext(); 54 void RequestOutsideRenderPassOperationContext();
54 55
55 /// Binds a pipeline to the current execution context. 56 /// Update the pipeline to the current execution context.
56 void BindGraphicsPipeline(VkPipeline pipeline); 57 bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline);
57 58
58 /// Invalidates current command buffer state except for render passes 59 /// Invalidates current command buffer state except for render passes
59 void InvalidateState(); 60 void InvalidateState();
@@ -85,6 +86,10 @@ public:
85 86
86 /// Waits for the given tick to trigger on the GPU. 87 /// Waits for the given tick to trigger on the GPU.
87 void Wait(u64 tick) { 88 void Wait(u64 tick) {
89 if (tick >= master_semaphore->CurrentTick()) {
90 // Make sure we are not waiting for the current tick without signalling
91 Flush();
92 }
88 master_semaphore->Wait(tick); 93 master_semaphore->Wait(tick);
89 } 94 }
90 95
@@ -154,15 +159,24 @@ private:
154 return true; 159 return true;
155 } 160 }
156 161
162 void MarkSubmit() {
163 submit = true;
164 }
165
157 bool Empty() const { 166 bool Empty() const {
158 return command_offset == 0; 167 return command_offset == 0;
159 } 168 }
160 169
170 bool HasSubmit() const {
171 return submit;
172 }
173
161 private: 174 private:
162 Command* first = nullptr; 175 Command* first = nullptr;
163 Command* last = nullptr; 176 Command* last = nullptr;
164 177
165 size_t command_offset = 0; 178 size_t command_offset = 0;
179 bool submit = false;
166 alignas(std::max_align_t) std::array<u8, 0x8000> data{}; 180 alignas(std::max_align_t) std::array<u8, 0x8000> data{};
167 }; 181 };
168 182
@@ -170,11 +184,13 @@ private:
170 VkRenderPass renderpass = nullptr; 184 VkRenderPass renderpass = nullptr;
171 VkFramebuffer framebuffer = nullptr; 185 VkFramebuffer framebuffer = nullptr;
172 VkExtent2D render_area = {0, 0}; 186 VkExtent2D render_area = {0, 0};
173 VkPipeline graphics_pipeline = nullptr; 187 GraphicsPipeline* graphics_pipeline = nullptr;
174 }; 188 };
175 189
176 void WorkerThread(); 190 void WorkerThread();
177 191
192 void AllocateWorkerCommandBuffer();
193
178 void SubmitExecution(VkSemaphore semaphore); 194 void SubmitExecution(VkSemaphore semaphore);
179 195
180 void AllocateNewContext(); 196 void AllocateNewContext();
@@ -204,11 +220,13 @@ private:
204 std::array<VkImage, 9> renderpass_images{}; 220 std::array<VkImage, 9> renderpass_images{};
205 std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{}; 221 std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};
206 222
207 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue; 223 std::queue<std::unique_ptr<CommandChunk>> work_queue;
208 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; 224 std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
209 std::mutex mutex; 225 std::mutex reserve_mutex;
210 std::condition_variable cv; 226 std::mutex work_mutex;
211 bool quit = false; 227 std::condition_variable work_cv;
228 std::condition_variable wait_cv;
229 std::atomic_bool quit{};
212}; 230};
213 231
214} // namespace Vulkan 232} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
deleted file mode 100644
index c6846d886..000000000
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ /dev/null
@@ -1,3166 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <functional>
6#include <limits>
7#include <map>
8#include <optional>
9#include <type_traits>
10#include <unordered_map>
11#include <utility>
12
13#include <fmt/format.h>
14
15#include <sirit/sirit.h>
16
17#include "common/alignment.h"
18#include "common/assert.h"
19#include "common/common_types.h"
20#include "common/logging/log.h"
21#include "video_core/engines/maxwell_3d.h"
22#include "video_core/engines/shader_bytecode.h"
23#include "video_core/engines/shader_header.h"
24#include "video_core/engines/shader_type.h"
25#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
26#include "video_core/shader/node.h"
27#include "video_core/shader/shader_ir.h"
28#include "video_core/shader/transform_feedback.h"
29#include "video_core/vulkan_common/vulkan_device.h"
30
31namespace Vulkan {
32
33namespace {
34
35using Sirit::Id;
36using Tegra::Engines::ShaderType;
37using Tegra::Shader::Attribute;
38using Tegra::Shader::PixelImap;
39using Tegra::Shader::Register;
40using namespace VideoCommon::Shader;
41
42using Maxwell = Tegra::Engines::Maxwell3D::Regs;
43using Operation = const OperationNode&;
44
45class ASTDecompiler;
46class ExprDecompiler;
47
48// TODO(Rodrigo): Use rasterizer's value
49constexpr u32 MaxConstBufferFloats = 0x4000;
50constexpr u32 MaxConstBufferElements = MaxConstBufferFloats / 4;
51
52constexpr u32 NumInputPatches = 32; // This value seems to be the standard
53
54enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
55
56class Expression final {
57public:
58 Expression(Id id_, Type type_) : id{id_}, type{type_} {
59 ASSERT(type_ != Type::Void);
60 }
61 Expression() : type{Type::Void} {}
62
63 Id id{};
64 Type type{};
65};
66static_assert(std::is_standard_layout_v<Expression>);
67
68struct TexelBuffer {
69 Id image_type{};
70 Id image{};
71};
72
73struct SampledImage {
74 Id image_type{};
75 Id sampler_type{};
76 Id sampler_pointer_type{};
77 Id variable{};
78};
79
80struct StorageImage {
81 Id image_type{};
82 Id image{};
83};
84
85struct AttributeType {
86 Type type;
87 Id scalar;
88 Id vector;
89};
90
91struct VertexIndices {
92 std::optional<u32> position;
93 std::optional<u32> layer;
94 std::optional<u32> viewport;
95 std::optional<u32> point_size;
96 std::optional<u32> clip_distances;
97};
98
99struct GenericVaryingDescription {
100 Id id = nullptr;
101 u32 first_element = 0;
102 bool is_scalar = false;
103};
104
105spv::Dim GetSamplerDim(const SamplerEntry& sampler) {
106 ASSERT(!sampler.is_buffer);
107 switch (sampler.type) {
108 case Tegra::Shader::TextureType::Texture1D:
109 return spv::Dim::Dim1D;
110 case Tegra::Shader::TextureType::Texture2D:
111 return spv::Dim::Dim2D;
112 case Tegra::Shader::TextureType::Texture3D:
113 return spv::Dim::Dim3D;
114 case Tegra::Shader::TextureType::TextureCube:
115 return spv::Dim::Cube;
116 default:
117 UNIMPLEMENTED_MSG("Unimplemented sampler type={}", sampler.type);
118 return spv::Dim::Dim2D;
119 }
120}
121
122std::pair<spv::Dim, bool> GetImageDim(const ImageEntry& image) {
123 switch (image.type) {
124 case Tegra::Shader::ImageType::Texture1D:
125 return {spv::Dim::Dim1D, false};
126 case Tegra::Shader::ImageType::TextureBuffer:
127 return {spv::Dim::Buffer, false};
128 case Tegra::Shader::ImageType::Texture1DArray:
129 return {spv::Dim::Dim1D, true};
130 case Tegra::Shader::ImageType::Texture2D:
131 return {spv::Dim::Dim2D, false};
132 case Tegra::Shader::ImageType::Texture2DArray:
133 return {spv::Dim::Dim2D, true};
134 case Tegra::Shader::ImageType::Texture3D:
135 return {spv::Dim::Dim3D, false};
136 default:
137 UNIMPLEMENTED_MSG("Unimplemented image type={}", image.type);
138 return {spv::Dim::Dim2D, false};
139 }
140}
141
142/// Returns the number of vertices present in a primitive topology.
143u32 GetNumPrimitiveTopologyVertices(Maxwell::PrimitiveTopology primitive_topology) {
144 switch (primitive_topology) {
145 case Maxwell::PrimitiveTopology::Points:
146 return 1;
147 case Maxwell::PrimitiveTopology::Lines:
148 case Maxwell::PrimitiveTopology::LineLoop:
149 case Maxwell::PrimitiveTopology::LineStrip:
150 return 2;
151 case Maxwell::PrimitiveTopology::Triangles:
152 case Maxwell::PrimitiveTopology::TriangleStrip:
153 case Maxwell::PrimitiveTopology::TriangleFan:
154 return 3;
155 case Maxwell::PrimitiveTopology::LinesAdjacency:
156 case Maxwell::PrimitiveTopology::LineStripAdjacency:
157 return 4;
158 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
159 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
160 return 6;
161 case Maxwell::PrimitiveTopology::Quads:
162 UNIMPLEMENTED_MSG("Quads");
163 return 3;
164 case Maxwell::PrimitiveTopology::QuadStrip:
165 UNIMPLEMENTED_MSG("QuadStrip");
166 return 3;
167 case Maxwell::PrimitiveTopology::Polygon:
168 UNIMPLEMENTED_MSG("Polygon");
169 return 3;
170 case Maxwell::PrimitiveTopology::Patches:
171 UNIMPLEMENTED_MSG("Patches");
172 return 3;
173 default:
174 UNREACHABLE();
175 return 3;
176 }
177}
178
179spv::ExecutionMode GetExecutionMode(Maxwell::TessellationPrimitive primitive) {
180 switch (primitive) {
181 case Maxwell::TessellationPrimitive::Isolines:
182 return spv::ExecutionMode::Isolines;
183 case Maxwell::TessellationPrimitive::Triangles:
184 return spv::ExecutionMode::Triangles;
185 case Maxwell::TessellationPrimitive::Quads:
186 return spv::ExecutionMode::Quads;
187 }
188 UNREACHABLE();
189 return spv::ExecutionMode::Triangles;
190}
191
192spv::ExecutionMode GetExecutionMode(Maxwell::TessellationSpacing spacing) {
193 switch (spacing) {
194 case Maxwell::TessellationSpacing::Equal:
195 return spv::ExecutionMode::SpacingEqual;
196 case Maxwell::TessellationSpacing::FractionalOdd:
197 return spv::ExecutionMode::SpacingFractionalOdd;
198 case Maxwell::TessellationSpacing::FractionalEven:
199 return spv::ExecutionMode::SpacingFractionalEven;
200 }
201 UNREACHABLE();
202 return spv::ExecutionMode::SpacingEqual;
203}
204
205spv::ExecutionMode GetExecutionMode(Maxwell::PrimitiveTopology input_topology) {
206 switch (input_topology) {
207 case Maxwell::PrimitiveTopology::Points:
208 return spv::ExecutionMode::InputPoints;
209 case Maxwell::PrimitiveTopology::Lines:
210 case Maxwell::PrimitiveTopology::LineLoop:
211 case Maxwell::PrimitiveTopology::LineStrip:
212 return spv::ExecutionMode::InputLines;
213 case Maxwell::PrimitiveTopology::Triangles:
214 case Maxwell::PrimitiveTopology::TriangleStrip:
215 case Maxwell::PrimitiveTopology::TriangleFan:
216 return spv::ExecutionMode::Triangles;
217 case Maxwell::PrimitiveTopology::LinesAdjacency:
218 case Maxwell::PrimitiveTopology::LineStripAdjacency:
219 return spv::ExecutionMode::InputLinesAdjacency;
220 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
221 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
222 return spv::ExecutionMode::InputTrianglesAdjacency;
223 case Maxwell::PrimitiveTopology::Quads:
224 UNIMPLEMENTED_MSG("Quads");
225 return spv::ExecutionMode::Triangles;
226 case Maxwell::PrimitiveTopology::QuadStrip:
227 UNIMPLEMENTED_MSG("QuadStrip");
228 return spv::ExecutionMode::Triangles;
229 case Maxwell::PrimitiveTopology::Polygon:
230 UNIMPLEMENTED_MSG("Polygon");
231 return spv::ExecutionMode::Triangles;
232 case Maxwell::PrimitiveTopology::Patches:
233 UNIMPLEMENTED_MSG("Patches");
234 return spv::ExecutionMode::Triangles;
235 }
236 UNREACHABLE();
237 return spv::ExecutionMode::Triangles;
238}
239
240spv::ExecutionMode GetExecutionMode(Tegra::Shader::OutputTopology output_topology) {
241 switch (output_topology) {
242 case Tegra::Shader::OutputTopology::PointList:
243 return spv::ExecutionMode::OutputPoints;
244 case Tegra::Shader::OutputTopology::LineStrip:
245 return spv::ExecutionMode::OutputLineStrip;
246 case Tegra::Shader::OutputTopology::TriangleStrip:
247 return spv::ExecutionMode::OutputTriangleStrip;
248 default:
249 UNREACHABLE();
250 return spv::ExecutionMode::OutputPoints;
251 }
252}
253
254/// Returns true if an attribute index is one of the 32 generic attributes
255constexpr bool IsGenericAttribute(Attribute::Index attribute) {
256 return attribute >= Attribute::Index::Attribute_0 &&
257 attribute <= Attribute::Index::Attribute_31;
258}
259
260/// Returns the location of a generic attribute
261u32 GetGenericAttributeLocation(Attribute::Index attribute) {
262 ASSERT(IsGenericAttribute(attribute));
263 return static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0);
264}
265
266/// Returns true if an object has to be treated as precise
267bool IsPrecise(Operation operand) {
268 const auto& meta{operand.GetMeta()};
269 if (std::holds_alternative<MetaArithmetic>(meta)) {
270 return std::get<MetaArithmetic>(meta).precise;
271 }
272 return false;
273}
274
275class SPIRVDecompiler final : public Sirit::Module {
276public:
277 explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_,
278 const Registry& registry_, const Specialization& specialization_)
279 : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()},
280 registry{registry_}, specialization{specialization_} {
281 if (stage_ != ShaderType::Compute) {
282 transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo());
283 }
284
285 AddCapability(spv::Capability::Shader);
286 AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
287 AddCapability(spv::Capability::ImageQuery);
288 AddCapability(spv::Capability::Image1D);
289 AddCapability(spv::Capability::ImageBuffer);
290 AddCapability(spv::Capability::ImageGatherExtended);
291 AddCapability(spv::Capability::SampledBuffer);
292 AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
293 AddCapability(spv::Capability::DrawParameters);
294 AddCapability(spv::Capability::SubgroupBallotKHR);
295 AddCapability(spv::Capability::SubgroupVoteKHR);
296 AddExtension("SPV_KHR_16bit_storage");
297 AddExtension("SPV_KHR_shader_ballot");
298 AddExtension("SPV_KHR_subgroup_vote");
299 AddExtension("SPV_KHR_storage_buffer_storage_class");
300 AddExtension("SPV_KHR_variable_pointers");
301 AddExtension("SPV_KHR_shader_draw_parameters");
302
303 if (!transform_feedback.empty()) {
304 if (device.IsExtTransformFeedbackSupported()) {
305 AddCapability(spv::Capability::TransformFeedback);
306 } else {
307 LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not "
308 "supported on this device");
309 }
310 }
311 if (ir.UsesLayer() || ir.UsesViewportIndex()) {
312 if (ir.UsesViewportIndex()) {
313 AddCapability(spv::Capability::MultiViewport);
314 }
315 if (stage != ShaderType::Geometry && device.IsExtShaderViewportIndexLayerSupported()) {
316 AddExtension("SPV_EXT_shader_viewport_index_layer");
317 AddCapability(spv::Capability::ShaderViewportIndexLayerEXT);
318 }
319 }
320 if (device.IsFormatlessImageLoadSupported()) {
321 AddCapability(spv::Capability::StorageImageReadWithoutFormat);
322 }
323 if (device.IsFloat16Supported()) {
324 AddCapability(spv::Capability::Float16);
325 }
326 t_scalar_half = Name(TypeFloat(device_.IsFloat16Supported() ? 16 : 32), "scalar_half");
327 t_half = Name(TypeVector(t_scalar_half, 2), "half");
328
329 const Id main = Decompile();
330
331 switch (stage) {
332 case ShaderType::Vertex:
333 AddEntryPoint(spv::ExecutionModel::Vertex, main, "main", interfaces);
334 break;
335 case ShaderType::TesselationControl:
336 AddCapability(spv::Capability::Tessellation);
337 AddEntryPoint(spv::ExecutionModel::TessellationControl, main, "main", interfaces);
338 AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
339 header.common2.threads_per_input_primitive);
340 break;
341 case ShaderType::TesselationEval: {
342 const auto& info = registry.GetGraphicsInfo();
343 AddCapability(spv::Capability::Tessellation);
344 AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces);
345 AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive));
346 AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing));
347 AddExecutionMode(main, info.tessellation_clockwise
348 ? spv::ExecutionMode::VertexOrderCw
349 : spv::ExecutionMode::VertexOrderCcw);
350 break;
351 }
352 case ShaderType::Geometry: {
353 const auto& info = registry.GetGraphicsInfo();
354 AddCapability(spv::Capability::Geometry);
355 AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces);
356 AddExecutionMode(main, GetExecutionMode(info.primitive_topology));
357 AddExecutionMode(main, GetExecutionMode(header.common3.output_topology));
358 AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
359 header.common4.max_output_vertices);
360 // TODO(Rodrigo): Where can we get this info from?
361 AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U);
362 break;
363 }
364 case ShaderType::Fragment:
365 AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces);
366 AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
367 if (header.ps.omap.depth) {
368 AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
369 }
370 if (specialization.early_fragment_tests) {
371 AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests);
372 }
373 break;
374 case ShaderType::Compute:
375 const auto workgroup_size = specialization.workgroup_size;
376 AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
377 workgroup_size[1], workgroup_size[2]);
378 AddEntryPoint(spv::ExecutionModel::GLCompute, main, "main", interfaces);
379 break;
380 }
381 }
382
383private:
384 Id Decompile() {
385 DeclareCommon();
386 DeclareVertex();
387 DeclareTessControl();
388 DeclareTessEval();
389 DeclareGeometry();
390 DeclareFragment();
391 DeclareCompute();
392 DeclareRegisters();
393 DeclareCustomVariables();
394 DeclarePredicates();
395 DeclareLocalMemory();
396 DeclareSharedMemory();
397 DeclareInternalFlags();
398 DeclareInputAttributes();
399 DeclareOutputAttributes();
400
401 u32 binding = specialization.base_binding;
402 binding = DeclareConstantBuffers(binding);
403 binding = DeclareGlobalBuffers(binding);
404 binding = DeclareUniformTexels(binding);
405 binding = DeclareSamplers(binding);
406 binding = DeclareStorageTexels(binding);
407 binding = DeclareImages(binding);
408
409 const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
410 AddLabel();
411
412 if (ir.IsDecompiled()) {
413 DeclareFlowVariables();
414 DecompileAST();
415 } else {
416 AllocateLabels();
417 DecompileBranchMode();
418 }
419
420 OpReturn();
421 OpFunctionEnd();
422
423 return main;
424 }
425
426 void DefinePrologue() {
427 if (stage == ShaderType::Vertex) {
428 // Clear Position to avoid reading trash on the Z conversion.
429 const auto position_index = out_indices.position.value();
430 const Id position = AccessElement(t_out_float4, out_vertex, position_index);
431 OpStore(position, v_varying_default);
432
433 if (specialization.point_size) {
434 const u32 point_size_index = out_indices.point_size.value();
435 const Id out_point_size = AccessElement(t_out_float, out_vertex, point_size_index);
436 OpStore(out_point_size, Constant(t_float, *specialization.point_size));
437 }
438 }
439 }
440
441 void DecompileAST();
442
443 void DecompileBranchMode() {
444 const u32 first_address = ir.GetBasicBlocks().begin()->first;
445 const Id loop_label = OpLabel("loop");
446 const Id merge_label = OpLabel("merge");
447 const Id dummy_label = OpLabel();
448 const Id jump_label = OpLabel();
449 continue_label = OpLabel("continue");
450
451 std::vector<Sirit::Literal> literals;
452 std::vector<Id> branch_labels;
453 for (const auto& [literal, label] : labels) {
454 literals.push_back(literal);
455 branch_labels.push_back(label);
456 }
457
458 jmp_to = OpVariable(TypePointer(spv::StorageClass::Function, t_uint),
459 spv::StorageClass::Function, Constant(t_uint, first_address));
460 AddLocalVariable(jmp_to);
461
462 std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack();
463 std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack();
464
465 Name(jmp_to, "jmp_to");
466 Name(ssy_flow_stack, "ssy_flow_stack");
467 Name(ssy_flow_stack_top, "ssy_flow_stack_top");
468 Name(pbk_flow_stack, "pbk_flow_stack");
469 Name(pbk_flow_stack_top, "pbk_flow_stack_top");
470
471 DefinePrologue();
472
473 OpBranch(loop_label);
474 AddLabel(loop_label);
475 OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone);
476 OpBranch(dummy_label);
477
478 AddLabel(dummy_label);
479 const Id default_branch = OpLabel();
480 const Id jmp_to_load = OpLoad(t_uint, jmp_to);
481 OpSelectionMerge(jump_label, spv::SelectionControlMask::MaskNone);
482 OpSwitch(jmp_to_load, default_branch, literals, branch_labels);
483
484 AddLabel(default_branch);
485 OpReturn();
486
487 for (const auto& [address, bb] : ir.GetBasicBlocks()) {
488 AddLabel(labels.at(address));
489
490 VisitBasicBlock(bb);
491
492 const auto next_it = labels.lower_bound(address + 1);
493 const Id next_label = next_it != labels.end() ? next_it->second : default_branch;
494 OpBranch(next_label);
495 }
496
497 AddLabel(jump_label);
498 OpBranch(continue_label);
499 AddLabel(continue_label);
500 OpBranch(loop_label);
501 AddLabel(merge_label);
502 }
503
504private:
505 friend class ASTDecompiler;
506 friend class ExprDecompiler;
507
508 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
509
510 void AllocateLabels() {
511 for (const auto& pair : ir.GetBasicBlocks()) {
512 const u32 address = pair.first;
513 labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address)));
514 }
515 }
516
517 void DeclareCommon() {
518 thread_id =
519 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id");
520 thread_masks[0] =
521 DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask");
522 thread_masks[1] =
523 DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask");
524 thread_masks[2] =
525 DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask");
526 thread_masks[3] =
527 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask");
528 thread_masks[4] =
529 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask");
530 }
531
532 void DeclareVertex() {
533 if (stage != ShaderType::Vertex) {
534 return;
535 }
536 Id out_vertex_struct;
537 std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct();
538 const Id vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct);
539 out_vertex = OpVariable(vertex_ptr, spv::StorageClass::Output);
540 interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex")));
541
542 // Declare input attributes
543 vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_int, "vertex_index");
544 instance_index =
545 DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_int, "instance_index");
546 base_vertex = DeclareInputBuiltIn(spv::BuiltIn::BaseVertex, t_in_int, "base_vertex");
547 base_instance = DeclareInputBuiltIn(spv::BuiltIn::BaseInstance, t_in_int, "base_instance");
548 }
549
550 void DeclareTessControl() {
551 if (stage != ShaderType::TesselationControl) {
552 return;
553 }
554 DeclareInputVertexArray(NumInputPatches);
555 DeclareOutputVertexArray(header.common2.threads_per_input_primitive);
556
557 tess_level_outer = DeclareBuiltIn(
558 spv::BuiltIn::TessLevelOuter, spv::StorageClass::Output,
559 TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 4U))),
560 "tess_level_outer");
561 Decorate(tess_level_outer, spv::Decoration::Patch);
562
563 tess_level_inner = DeclareBuiltIn(
564 spv::BuiltIn::TessLevelInner, spv::StorageClass::Output,
565 TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 2U))),
566 "tess_level_inner");
567 Decorate(tess_level_inner, spv::Decoration::Patch);
568
569 invocation_id = DeclareInputBuiltIn(spv::BuiltIn::InvocationId, t_in_int, "invocation_id");
570 }
571
572 void DeclareTessEval() {
573 if (stage != ShaderType::TesselationEval) {
574 return;
575 }
576 DeclareInputVertexArray(NumInputPatches);
577 DeclareOutputVertex();
578
579 tess_coord = DeclareInputBuiltIn(spv::BuiltIn::TessCoord, t_in_float3, "tess_coord");
580 }
581
582 void DeclareGeometry() {
583 if (stage != ShaderType::Geometry) {
584 return;
585 }
586 const auto& info = registry.GetGraphicsInfo();
587 const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology);
588 DeclareInputVertexArray(num_input);
589 DeclareOutputVertex();
590 }
591
592 void DeclareFragment() {
593 if (stage != ShaderType::Fragment) {
594 return;
595 }
596
597 for (u32 rt = 0; rt < static_cast<u32>(std::size(frag_colors)); ++rt) {
598 if (!IsRenderTargetEnabled(rt)) {
599 continue;
600 }
601 const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output));
602 Name(id, fmt::format("frag_color{}", rt));
603 Decorate(id, spv::Decoration::Location, rt);
604
605 frag_colors[rt] = id;
606 interfaces.push_back(id);
607 }
608
609 if (header.ps.omap.depth) {
610 frag_depth = AddGlobalVariable(OpVariable(t_out_float, spv::StorageClass::Output));
611 Name(frag_depth, "frag_depth");
612 Decorate(frag_depth, spv::Decoration::BuiltIn,
613 static_cast<u32>(spv::BuiltIn::FragDepth));
614
615 interfaces.push_back(frag_depth);
616 }
617
618 frag_coord = DeclareInputBuiltIn(spv::BuiltIn::FragCoord, t_in_float4, "frag_coord");
619 front_facing = DeclareInputBuiltIn(spv::BuiltIn::FrontFacing, t_in_bool, "front_facing");
620 point_coord = DeclareInputBuiltIn(spv::BuiltIn::PointCoord, t_in_float2, "point_coord");
621 }
622
623 void DeclareCompute() {
624 if (stage != ShaderType::Compute) {
625 return;
626 }
627
628 workgroup_id = DeclareInputBuiltIn(spv::BuiltIn::WorkgroupId, t_in_uint3, "workgroup_id");
629 local_invocation_id =
630 DeclareInputBuiltIn(spv::BuiltIn::LocalInvocationId, t_in_uint3, "local_invocation_id");
631 }
632
633 void DeclareRegisters() {
634 for (const u32 gpr : ir.GetRegisters()) {
635 const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
636 Name(id, fmt::format("gpr_{}", gpr));
637 registers.emplace(gpr, AddGlobalVariable(id));
638 }
639 }
640
641 void DeclareCustomVariables() {
642 const u32 num_custom_variables = ir.GetNumCustomVariables();
643 for (u32 i = 0; i < num_custom_variables; ++i) {
644 const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
645 Name(id, fmt::format("custom_var_{}", i));
646 custom_variables.emplace(i, AddGlobalVariable(id));
647 }
648 }
649
650 void DeclarePredicates() {
651 for (const auto pred : ir.GetPredicates()) {
652 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
653 Name(id, fmt::format("pred_{}", static_cast<u32>(pred)));
654 predicates.emplace(pred, AddGlobalVariable(id));
655 }
656 }
657
658 void DeclareFlowVariables() {
659 for (u32 i = 0; i < ir.GetASTNumVariables(); i++) {
660 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
661 Name(id, fmt::format("flow_var_{}", static_cast<u32>(i)));
662 flow_variables.emplace(i, AddGlobalVariable(id));
663 }
664 }
665
666 void DeclareLocalMemory() {
667 // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
668 // specialization time.
669 const u64 lmem_size = stage == ShaderType::Compute ? 0x400 : header.GetLocalMemorySize();
670 if (lmem_size == 0) {
671 return;
672 }
673 const auto element_count = static_cast<u32>(Common::AlignUp(lmem_size, 4) / 4);
674 const Id type_array = TypeArray(t_float, Constant(t_uint, element_count));
675 const Id type_pointer = TypePointer(spv::StorageClass::Private, type_array);
676 Name(type_pointer, "LocalMemory");
677
678 local_memory =
679 OpVariable(type_pointer, spv::StorageClass::Private, ConstantNull(type_array));
680 AddGlobalVariable(Name(local_memory, "local_memory"));
681 }
682
683 void DeclareSharedMemory() {
684 if (stage != ShaderType::Compute) {
685 return;
686 }
687 t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint);
688
689 u32 smem_size = specialization.shared_memory_size * 4;
690 if (smem_size == 0) {
691 // Avoid declaring an empty array.
692 return;
693 }
694 const u32 limit = device.GetMaxComputeSharedMemorySize();
695 if (smem_size > limit) {
696 LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}",
697 smem_size, limit);
698 smem_size = limit;
699 }
700
701 const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4));
702 const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array);
703 Name(type_pointer, "SharedMemory");
704
705 shared_memory = OpVariable(type_pointer, spv::StorageClass::Workgroup);
706 AddGlobalVariable(Name(shared_memory, "shared_memory"));
707 }
708
709 void DeclareInternalFlags() {
710 static constexpr std::array names{"zero", "sign", "carry", "overflow"};
711
712 for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
713 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
714 internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
715 }
716 }
717
718 void DeclareInputVertexArray(u32 length) {
719 constexpr auto storage = spv::StorageClass::Input;
720 std::tie(in_indices, in_vertex) = DeclareVertexArray(storage, "in_indices", length);
721 }
722
723 void DeclareOutputVertexArray(u32 length) {
724 constexpr auto storage = spv::StorageClass::Output;
725 std::tie(out_indices, out_vertex) = DeclareVertexArray(storage, "out_indices", length);
726 }
727
728 std::tuple<VertexIndices, Id> DeclareVertexArray(spv::StorageClass storage_class,
729 std::string name, u32 length) {
730 const auto [struct_id, indices] = DeclareVertexStruct();
731 const Id vertex_array = TypeArray(struct_id, Constant(t_uint, length));
732 const Id vertex_ptr = TypePointer(storage_class, vertex_array);
733 const Id vertex = OpVariable(vertex_ptr, storage_class);
734 AddGlobalVariable(Name(vertex, std::move(name)));
735 interfaces.push_back(vertex);
736 return {indices, vertex};
737 }
738
739 void DeclareOutputVertex() {
740 Id out_vertex_struct;
741 std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct();
742 const Id out_vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct);
743 out_vertex = OpVariable(out_vertex_ptr, spv::StorageClass::Output);
744 interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex")));
745 }
746
747 void DeclareInputAttributes() {
748 for (const auto index : ir.GetInputAttributes()) {
749 if (!IsGenericAttribute(index)) {
750 continue;
751 }
752 const u32 location = GetGenericAttributeLocation(index);
753 if (!IsAttributeEnabled(location)) {
754 continue;
755 }
756 const auto type_descriptor = GetAttributeType(location);
757 Id type;
758 if (IsInputAttributeArray()) {
759 type = GetTypeVectorDefinitionLut(type_descriptor.type).at(3);
760 type = TypeArray(type, Constant(t_uint, GetNumInputVertices()));
761 type = TypePointer(spv::StorageClass::Input, type);
762 } else {
763 type = type_descriptor.vector;
764 }
765 const Id id = OpVariable(type, spv::StorageClass::Input);
766 AddGlobalVariable(Name(id, fmt::format("in_attr{}", location)));
767 input_attributes.emplace(index, id);
768 interfaces.push_back(id);
769
770 Decorate(id, spv::Decoration::Location, location);
771
772 if (stage != ShaderType::Fragment) {
773 continue;
774 }
775 switch (header.ps.GetPixelImap(location)) {
776 case PixelImap::Constant:
777 Decorate(id, spv::Decoration::Flat);
778 break;
779 case PixelImap::Perspective:
780 // Default
781 break;
782 case PixelImap::ScreenLinear:
783 Decorate(id, spv::Decoration::NoPerspective);
784 break;
785 default:
786 UNREACHABLE_MSG("Unused attribute being fetched");
787 }
788 }
789 }
790
791 void DeclareOutputAttributes() {
792 if (stage == ShaderType::Compute || stage == ShaderType::Fragment) {
793 return;
794 }
795
796 UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex);
797 for (const auto index : ir.GetOutputAttributes()) {
798 if (!IsGenericAttribute(index)) {
799 continue;
800 }
801 DeclareOutputAttribute(index);
802 }
803 }
804
805 void DeclareOutputAttribute(Attribute::Index index) {
806 static constexpr std::string_view swizzle = "xyzw";
807
808 const u32 location = GetGenericAttributeLocation(index);
809 u8 element = 0;
810 while (element < 4) {
811 const std::size_t remainder = 4 - element;
812
813 std::size_t num_components = remainder;
814 const std::optional tfb = GetTransformFeedbackInfo(index, element);
815 if (tfb) {
816 num_components = tfb->components;
817 }
818
819 Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1);
820 Id varying_default = v_varying_default;
821 if (IsOutputAttributeArray()) {
822 const u32 num = GetNumOutputVertices();
823 type = TypeArray(type, Constant(t_uint, num));
824 if (device.GetDriverID() != VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) {
825 // Intel's proprietary driver fails to setup defaults for arrayed output
826 // attributes.
827 varying_default = ConstantComposite(type, std::vector(num, varying_default));
828 }
829 }
830 type = TypePointer(spv::StorageClass::Output, type);
831
832 std::string name = fmt::format("out_attr{}", location);
833 if (num_components < 4 || element > 0) {
834 name = fmt::format("{}_{}", name, swizzle.substr(element, num_components));
835 }
836
837 const Id id = OpVariable(type, spv::StorageClass::Output, varying_default);
838 Name(AddGlobalVariable(id), name);
839
840 GenericVaryingDescription description;
841 description.id = id;
842 description.first_element = element;
843 description.is_scalar = num_components == 1;
844 for (u32 i = 0; i < num_components; ++i) {
845 const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i);
846 output_attributes.emplace(offset, description);
847 }
848 interfaces.push_back(id);
849
850 Decorate(id, spv::Decoration::Location, location);
851 if (element > 0) {
852 Decorate(id, spv::Decoration::Component, static_cast<u32>(element));
853 }
854 if (tfb && device.IsExtTransformFeedbackSupported()) {
855 Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer));
856 Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride));
857 Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset));
858 }
859
860 element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
861 }
862 }
863
864 std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) {
865 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
866 const auto it = transform_feedback.find(location);
867 if (it == transform_feedback.end()) {
868 return {};
869 }
870 return it->second;
871 }
872
873 u32 DeclareConstantBuffers(u32 binding) {
874 for (const auto& [index, size] : ir.GetConstantBuffers()) {
875 const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo
876 : t_cbuf_std140_ubo;
877 const Id id = OpVariable(type, spv::StorageClass::Uniform);
878 AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
879
880 Decorate(id, spv::Decoration::Binding, binding++);
881 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
882 constant_buffers.emplace(index, id);
883 }
884 return binding;
885 }
886
887 u32 DeclareGlobalBuffers(u32 binding) {
888 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
889 const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer);
890 AddGlobalVariable(
891 Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset)));
892
893 Decorate(id, spv::Decoration::Binding, binding++);
894 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
895 global_buffers.emplace(base, id);
896 }
897 return binding;
898 }
899
900 u32 DeclareUniformTexels(u32 binding) {
901 for (const auto& sampler : ir.GetSamplers()) {
902 if (!sampler.is_buffer) {
903 continue;
904 }
905 ASSERT(!sampler.is_array);
906 ASSERT(!sampler.is_shadow);
907
908 constexpr auto dim = spv::Dim::Buffer;
909 constexpr int depth = 0;
910 constexpr int arrayed = 0;
911 constexpr bool ms = false;
912 constexpr int sampled = 1;
913 constexpr auto format = spv::ImageFormat::Unknown;
914 const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format);
915 const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
916 const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
917 AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index)));
918 Decorate(id, spv::Decoration::Binding, binding++);
919 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
920
921 uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id});
922 }
923 return binding;
924 }
925
926 u32 DeclareSamplers(u32 binding) {
927 for (const auto& sampler : ir.GetSamplers()) {
928 if (sampler.is_buffer) {
929 continue;
930 }
931 const auto dim = GetSamplerDim(sampler);
932 const int depth = sampler.is_shadow ? 1 : 0;
933 const int arrayed = sampler.is_array ? 1 : 0;
934 constexpr bool ms = false;
935 constexpr int sampled = 1;
936 constexpr auto format = spv::ImageFormat::Unknown;
937 const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format);
938 const Id sampler_type = TypeSampledImage(image_type);
939 const Id sampler_pointer_type =
940 TypePointer(spv::StorageClass::UniformConstant, sampler_type);
941 const Id type = sampler.is_indexed
942 ? TypeArray(sampler_type, Constant(t_uint, sampler.size))
943 : sampler_type;
944 const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type);
945 const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
946 AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index)));
947 Decorate(id, spv::Decoration::Binding, binding++);
948 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
949
950 sampled_images.emplace(
951 sampler.index, SampledImage{image_type, sampler_type, sampler_pointer_type, id});
952 }
953 return binding;
954 }
955
956 u32 DeclareStorageTexels(u32 binding) {
957 for (const auto& image : ir.GetImages()) {
958 if (image.type != Tegra::Shader::ImageType::TextureBuffer) {
959 continue;
960 }
961 DeclareImage(image, binding);
962 }
963 return binding;
964 }
965
966 u32 DeclareImages(u32 binding) {
967 for (const auto& image : ir.GetImages()) {
968 if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
969 continue;
970 }
971 DeclareImage(image, binding);
972 }
973 return binding;
974 }
975
976 void DeclareImage(const ImageEntry& image, u32& binding) {
977 const auto [dim, arrayed] = GetImageDim(image);
978 constexpr int depth = 0;
979 constexpr bool ms = false;
980 constexpr int sampled = 2; // This won't be accessed with a sampler
981 const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown;
982 const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
983 const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
984 const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
985 AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
986
987 Decorate(id, spv::Decoration::Binding, binding++);
988 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
989 if (image.is_read && !image.is_written) {
990 Decorate(id, spv::Decoration::NonWritable);
991 } else if (image.is_written && !image.is_read) {
992 Decorate(id, spv::Decoration::NonReadable);
993 }
994
995 images.emplace(image.index, StorageImage{image_type, id});
996 }
997
998 bool IsRenderTargetEnabled(u32 rt) const {
999 for (u32 component = 0; component < 4; ++component) {
1000 if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
1001 return true;
1002 }
1003 }
1004 return false;
1005 }
1006
1007 bool IsInputAttributeArray() const {
1008 return stage == ShaderType::TesselationControl || stage == ShaderType::TesselationEval ||
1009 stage == ShaderType::Geometry;
1010 }
1011
1012 bool IsOutputAttributeArray() const {
1013 return stage == ShaderType::TesselationControl;
1014 }
1015
1016 bool IsAttributeEnabled(u32 location) const {
1017 return stage != ShaderType::Vertex || specialization.enabled_attributes[location];
1018 }
1019
1020 u32 GetNumInputVertices() const {
1021 switch (stage) {
1022 case ShaderType::Geometry:
1023 return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology);
1024 case ShaderType::TesselationControl:
1025 case ShaderType::TesselationEval:
1026 return NumInputPatches;
1027 default:
1028 UNREACHABLE();
1029 return 1;
1030 }
1031 }
1032
1033 u32 GetNumOutputVertices() const {
1034 switch (stage) {
1035 case ShaderType::TesselationControl:
1036 return header.common2.threads_per_input_primitive;
1037 default:
1038 UNREACHABLE();
1039 return 1;
1040 }
1041 }
1042
1043 std::tuple<Id, VertexIndices> DeclareVertexStruct() {
1044 struct BuiltIn {
1045 Id type;
1046 spv::BuiltIn builtin;
1047 const char* name;
1048 };
1049 std::vector<BuiltIn> members;
1050 members.reserve(4);
1051
1052 const auto AddBuiltIn = [&](Id type, spv::BuiltIn builtin, const char* name) {
1053 const auto index = static_cast<u32>(members.size());
1054 members.push_back(BuiltIn{type, builtin, name});
1055 return index;
1056 };
1057
1058 VertexIndices indices;
1059 indices.position = AddBuiltIn(t_float4, spv::BuiltIn::Position, "position");
1060
1061 if (ir.UsesLayer()) {
1062 if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) {
1063 indices.layer = AddBuiltIn(t_int, spv::BuiltIn::Layer, "layer");
1064 } else {
1065 LOG_ERROR(
1066 Render_Vulkan,
1067 "Shader requires Layer but it's not supported on this stage with this device.");
1068 }
1069 }
1070
1071 if (ir.UsesViewportIndex()) {
1072 if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) {
1073 indices.viewport = AddBuiltIn(t_int, spv::BuiltIn::ViewportIndex, "viewport_index");
1074 } else {
1075 LOG_ERROR(Render_Vulkan, "Shader requires ViewportIndex but it's not supported on "
1076 "this stage with this device.");
1077 }
1078 }
1079
1080 if (ir.UsesPointSize() || specialization.point_size) {
1081 indices.point_size = AddBuiltIn(t_float, spv::BuiltIn::PointSize, "point_size");
1082 }
1083
1084 const auto& ir_output_attributes = ir.GetOutputAttributes();
1085 const bool declare_clip_distances = std::any_of(
1086 ir_output_attributes.begin(), ir_output_attributes.end(), [](const auto& index) {
1087 return index == Attribute::Index::ClipDistances0123 ||
1088 index == Attribute::Index::ClipDistances4567;
1089 });
1090 if (declare_clip_distances) {
1091 indices.clip_distances = AddBuiltIn(TypeArray(t_float, Constant(t_uint, 8)),
1092 spv::BuiltIn::ClipDistance, "clip_distances");
1093 }
1094
1095 std::vector<Id> member_types;
1096 member_types.reserve(members.size());
1097 for (std::size_t i = 0; i < members.size(); ++i) {
1098 member_types.push_back(members[i].type);
1099 }
1100 const Id per_vertex_struct = Name(TypeStruct(member_types), "PerVertex");
1101 Decorate(per_vertex_struct, spv::Decoration::Block);
1102
1103 for (std::size_t index = 0; index < members.size(); ++index) {
1104 const auto& member = members[index];
1105 MemberName(per_vertex_struct, static_cast<u32>(index), member.name);
1106 MemberDecorate(per_vertex_struct, static_cast<u32>(index), spv::Decoration::BuiltIn,
1107 static_cast<u32>(member.builtin));
1108 }
1109
1110 return {per_vertex_struct, indices};
1111 }
1112
1113 void VisitBasicBlock(const NodeBlock& bb) {
1114 for (const auto& node : bb) {
1115 Visit(node);
1116 }
1117 }
1118
1119 Expression Visit(const Node& node) {
1120 if (const auto operation = std::get_if<OperationNode>(&*node)) {
1121 if (const auto amend_index = operation->GetAmendIndex()) {
1122 [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
1123 ASSERT(type == Type::Void);
1124 }
1125 const auto operation_index = static_cast<std::size_t>(operation->GetCode());
1126 const auto decompiler = operation_decompilers[operation_index];
1127 if (decompiler == nullptr) {
1128 UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index);
1129 }
1130 return (this->*decompiler)(*operation);
1131 }
1132
1133 if (const auto gpr = std::get_if<GprNode>(&*node)) {
1134 const u32 index = gpr->GetIndex();
1135 if (index == Register::ZeroIndex) {
1136 return {v_float_zero, Type::Float};
1137 }
1138 return {OpLoad(t_float, registers.at(index)), Type::Float};
1139 }
1140
1141 if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
1142 const u32 index = cv->GetIndex();
1143 return {OpLoad(t_float, custom_variables.at(index)), Type::Float};
1144 }
1145
1146 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
1147 return {Constant(t_uint, immediate->GetValue()), Type::Uint};
1148 }
1149
1150 if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
1151 const auto value = [&]() -> Id {
1152 switch (const auto index = predicate->GetIndex(); index) {
1153 case Tegra::Shader::Pred::UnusedIndex:
1154 return v_true;
1155 case Tegra::Shader::Pred::NeverExecute:
1156 return v_false;
1157 default:
1158 return OpLoad(t_bool, predicates.at(index));
1159 }
1160 }();
1161 if (predicate->IsNegated()) {
1162 return {OpLogicalNot(t_bool, value), Type::Bool};
1163 }
1164 return {value, Type::Bool};
1165 }
1166
1167 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
1168 const auto attribute = abuf->GetIndex();
1169 const u32 element = abuf->GetElement();
1170 const auto& buffer = abuf->GetBuffer();
1171
1172 const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector<u32> indices) {
1173 std::vector<Id> members;
1174 members.reserve(std::size(indices) + 1);
1175
1176 if (buffer && IsInputAttributeArray()) {
1177 members.push_back(AsUint(Visit(buffer)));
1178 }
1179 for (const u32 index : indices) {
1180 members.push_back(Constant(t_uint, index));
1181 }
1182 return OpAccessChain(pointer_type, composite, members);
1183 };
1184
1185 switch (attribute) {
1186 case Attribute::Index::Position: {
1187 if (stage == ShaderType::Fragment) {
1188 return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)),
1189 Type::Float};
1190 }
1191 const std::vector elements = {in_indices.position.value(), element};
1192 return {OpLoad(t_float, ArrayPass(t_in_float, in_vertex, elements)), Type::Float};
1193 }
1194 case Attribute::Index::PointCoord: {
1195 switch (element) {
1196 case 0:
1197 case 1:
1198 return {OpCompositeExtract(t_float, OpLoad(t_float2, point_coord), element),
1199 Type::Float};
1200 }
1201 UNIMPLEMENTED_MSG("Unimplemented point coord element={}", element);
1202 return {v_float_zero, Type::Float};
1203 }
1204 case Attribute::Index::TessCoordInstanceIDVertexID:
1205 // TODO(Subv): Find out what the values are for the first two elements when inside a
1206 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
1207 // shader.
1208 switch (element) {
1209 case 0:
1210 case 1:
1211 return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)),
1212 Type::Float};
1213 case 2:
1214 return {
1215 OpISub(t_int, OpLoad(t_int, instance_index), OpLoad(t_int, base_instance)),
1216 Type::Int};
1217 case 3:
1218 return {OpISub(t_int, OpLoad(t_int, vertex_index), OpLoad(t_int, base_vertex)),
1219 Type::Int};
1220 }
1221 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
1222 return {Constant(t_uint, 0U), Type::Uint};
1223 case Attribute::Index::FrontFacing:
1224 // TODO(Subv): Find out what the values are for the other elements.
1225 ASSERT(stage == ShaderType::Fragment);
1226 if (element == 3) {
1227 const Id is_front_facing = OpLoad(t_bool, front_facing);
1228 const Id true_value = Constant(t_int, static_cast<s32>(-1));
1229 const Id false_value = Constant(t_int, 0);
1230 return {OpSelect(t_int, is_front_facing, true_value, false_value), Type::Int};
1231 }
1232 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
1233 return {v_float_zero, Type::Float};
1234 default:
1235 if (!IsGenericAttribute(attribute)) {
1236 break;
1237 }
1238 const u32 location = GetGenericAttributeLocation(attribute);
1239 if (!IsAttributeEnabled(location)) {
1240 // Disabled attributes (also known as constant attributes) always return zero.
1241 return {v_float_zero, Type::Float};
1242 }
1243 const auto type_descriptor = GetAttributeType(location);
1244 const Type type = type_descriptor.type;
1245 const Id attribute_id = input_attributes.at(attribute);
1246 const std::vector elements = {element};
1247 const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
1248 return {OpLoad(GetTypeDefinition(type), pointer), type};
1249 }
1250 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
1251 return {v_float_zero, Type::Float};
1252 }
1253
1254 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1255 const Node& offset = cbuf->GetOffset();
1256 const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
1257
1258 Id pointer{};
1259 if (device.IsKhrUniformBufferStandardLayoutSupported()) {
1260 const Id buffer_offset =
1261 OpShiftRightLogical(t_uint, AsUint(Visit(offset)), Constant(t_uint, 2U));
1262 pointer =
1263 OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0U), buffer_offset);
1264 } else {
1265 Id buffer_index{};
1266 Id buffer_element{};
1267 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
1268 // Direct access
1269 const u32 offset_imm = immediate->GetValue();
1270 ASSERT(offset_imm % 4 == 0);
1271 buffer_index = Constant(t_uint, offset_imm / 16);
1272 buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
1273 } else if (std::holds_alternative<OperationNode>(*offset)) {
1274 // Indirect access
1275 const Id offset_id = AsUint(Visit(offset));
1276 const Id unsafe_offset = OpUDiv(t_uint, offset_id, Constant(t_uint, 4));
1277 const Id final_offset =
1278 OpUMod(t_uint, unsafe_offset, Constant(t_uint, MaxConstBufferElements - 1));
1279 buffer_index = OpUDiv(t_uint, final_offset, Constant(t_uint, 4));
1280 buffer_element = OpUMod(t_uint, final_offset, Constant(t_uint, 4));
1281 } else {
1282 UNREACHABLE_MSG("Unmanaged offset node type");
1283 }
1284 pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index,
1285 buffer_element);
1286 }
1287 return {OpLoad(t_float, pointer), Type::Float};
1288 }
1289
1290 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1291 return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint};
1292 }
1293
1294 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
1295 Id address = AsUint(Visit(lmem->GetAddress()));
1296 address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
1297 const Id pointer = OpAccessChain(t_prv_float, local_memory, address);
1298 return {OpLoad(t_float, pointer), Type::Float};
1299 }
1300
1301 if (const auto smem = std::get_if<SmemNode>(&*node)) {
1302 return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint};
1303 }
1304
1305 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
1306 const Id flag = internal_flags.at(static_cast<std::size_t>(internal_flag->GetFlag()));
1307 return {OpLoad(t_bool, flag), Type::Bool};
1308 }
1309
1310 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
1311 if (const auto amend_index = conditional->GetAmendIndex()) {
1312 [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
1313 ASSERT(type == Type::Void);
1314 }
1315 // It's invalid to call conditional on nested nodes, use an operation instead
1316 const Id true_label = OpLabel();
1317 const Id skip_label = OpLabel();
1318 const Id condition = AsBool(Visit(conditional->GetCondition()));
1319 OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone);
1320 OpBranchConditional(condition, true_label, skip_label);
1321 AddLabel(true_label);
1322
1323 conditional_branch_set = true;
1324 inside_branch = false;
1325 VisitBasicBlock(conditional->GetCode());
1326 conditional_branch_set = false;
1327 if (!inside_branch) {
1328 OpBranch(skip_label);
1329 } else {
1330 inside_branch = false;
1331 }
1332 AddLabel(skip_label);
1333 return {};
1334 }
1335
1336 if (const auto comment = std::get_if<CommentNode>(&*node)) {
1337 if (device.HasDebuggingToolAttached()) {
1338 // We should insert comments with OpString instead of using named variables
1339 Name(OpUndef(t_int), comment->GetText());
1340 }
1341 return {};
1342 }
1343
1344 UNREACHABLE();
1345 return {};
1346 }
1347
1348 template <Id (Module::*func)(Id, Id), Type result_type, Type type_a = result_type>
1349 Expression Unary(Operation operation) {
1350 const Id type_def = GetTypeDefinition(result_type);
1351 const Id op_a = As(Visit(operation[0]), type_a);
1352
1353 const Id value = (this->*func)(type_def, op_a);
1354 if (IsPrecise(operation)) {
1355 Decorate(value, spv::Decoration::NoContraction);
1356 }
1357 return {value, result_type};
1358 }
1359
1360 template <Id (Module::*func)(Id, Id, Id), Type result_type, Type type_a = result_type,
1361 Type type_b = type_a>
1362 Expression Binary(Operation operation) {
1363 const Id type_def = GetTypeDefinition(result_type);
1364 const Id op_a = As(Visit(operation[0]), type_a);
1365 const Id op_b = As(Visit(operation[1]), type_b);
1366
1367 const Id value = (this->*func)(type_def, op_a, op_b);
1368 if (IsPrecise(operation)) {
1369 Decorate(value, spv::Decoration::NoContraction);
1370 }
1371 return {value, result_type};
1372 }
1373
1374 template <Id (Module::*func)(Id, Id, Id, Id), Type result_type, Type type_a = result_type,
1375 Type type_b = type_a, Type type_c = type_b>
1376 Expression Ternary(Operation operation) {
1377 const Id type_def = GetTypeDefinition(result_type);
1378 const Id op_a = As(Visit(operation[0]), type_a);
1379 const Id op_b = As(Visit(operation[1]), type_b);
1380 const Id op_c = As(Visit(operation[2]), type_c);
1381
1382 const Id value = (this->*func)(type_def, op_a, op_b, op_c);
1383 if (IsPrecise(operation)) {
1384 Decorate(value, spv::Decoration::NoContraction);
1385 }
1386 return {value, result_type};
1387 }
1388
1389 template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, Type type_a = result_type,
1390 Type type_b = type_a, Type type_c = type_b, Type type_d = type_c>
1391 Expression Quaternary(Operation operation) {
1392 const Id type_def = GetTypeDefinition(result_type);
1393 const Id op_a = As(Visit(operation[0]), type_a);
1394 const Id op_b = As(Visit(operation[1]), type_b);
1395 const Id op_c = As(Visit(operation[2]), type_c);
1396 const Id op_d = As(Visit(operation[3]), type_d);
1397
1398 const Id value = (this->*func)(type_def, op_a, op_b, op_c, op_d);
1399 if (IsPrecise(operation)) {
1400 Decorate(value, spv::Decoration::NoContraction);
1401 }
1402 return {value, result_type};
1403 }
1404
1405 Expression Assign(Operation operation) {
1406 const Node& dest = operation[0];
1407 const Node& src = operation[1];
1408
1409 Expression target{};
1410 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1411 if (gpr->GetIndex() == Register::ZeroIndex) {
1412 // Writing to Register::ZeroIndex is a no op but we still have to visit its source
1413 // because it might have side effects.
1414 Visit(src);
1415 return {};
1416 }
1417 target = {registers.at(gpr->GetIndex()), Type::Float};
1418
1419 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
1420 const auto& buffer = abuf->GetBuffer();
1421 const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector<u32> indices) {
1422 std::vector<Id> members;
1423 members.reserve(std::size(indices) + 1);
1424
1425 if (buffer && IsOutputAttributeArray()) {
1426 members.push_back(AsUint(Visit(buffer)));
1427 }
1428 for (const u32 index : indices) {
1429 members.push_back(Constant(t_uint, index));
1430 }
1431 return OpAccessChain(pointer_type, composite, members);
1432 };
1433
1434 target = [&]() -> Expression {
1435 const u32 element = abuf->GetElement();
1436 switch (const auto attribute = abuf->GetIndex(); attribute) {
1437 case Attribute::Index::Position: {
1438 const u32 index = out_indices.position.value();
1439 return {ArrayPass(t_out_float, out_vertex, {index, element}), Type::Float};
1440 }
1441 case Attribute::Index::LayerViewportPointSize:
1442 switch (element) {
1443 case 1: {
1444 if (!out_indices.layer) {
1445 return {};
1446 }
1447 const u32 index = out_indices.layer.value();
1448 return {AccessElement(t_out_int, out_vertex, index), Type::Int};
1449 }
1450 case 2: {
1451 if (!out_indices.viewport) {
1452 return {};
1453 }
1454 const u32 index = out_indices.viewport.value();
1455 return {AccessElement(t_out_int, out_vertex, index), Type::Int};
1456 }
1457 case 3: {
1458 const auto index = out_indices.point_size.value();
1459 return {AccessElement(t_out_float, out_vertex, index), Type::Float};
1460 }
1461 default:
1462 UNIMPLEMENTED_MSG("LayerViewportPoint element={}", abuf->GetElement());
1463 return {};
1464 }
1465 case Attribute::Index::ClipDistances0123: {
1466 const u32 index = out_indices.clip_distances.value();
1467 return {AccessElement(t_out_float, out_vertex, index, element), Type::Float};
1468 }
1469 case Attribute::Index::ClipDistances4567: {
1470 const u32 index = out_indices.clip_distances.value();
1471 return {AccessElement(t_out_float, out_vertex, index, element + 4),
1472 Type::Float};
1473 }
1474 default:
1475 if (IsGenericAttribute(attribute)) {
1476 const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element);
1477 const GenericVaryingDescription description = output_attributes.at(offset);
1478 const Id composite = description.id;
1479 std::vector<u32> indices;
1480 if (!description.is_scalar) {
1481 indices.push_back(element - description.first_element);
1482 }
1483 return {ArrayPass(t_out_float, composite, indices), Type::Float};
1484 }
1485 UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
1486 static_cast<u32>(attribute));
1487 return {};
1488 }
1489 }();
1490
1491 } else if (const auto patch = std::get_if<PatchNode>(&*dest)) {
1492 target = [&]() -> Expression {
1493 const u32 offset = patch->GetOffset();
1494 switch (offset) {
1495 case 0:
1496 case 1:
1497 case 2:
1498 case 3:
1499 return {AccessElement(t_out_float, tess_level_outer, offset % 4), Type::Float};
1500 case 4:
1501 case 5:
1502 return {AccessElement(t_out_float, tess_level_inner, offset % 4), Type::Float};
1503 }
1504 UNIMPLEMENTED_MSG("Unhandled patch output offset: {}", offset);
1505 return {};
1506 }();
1507
1508 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1509 Id address = AsUint(Visit(lmem->GetAddress()));
1510 address = OpUDiv(t_uint, address, Constant(t_uint, 4));
1511 target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float};
1512
1513 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1514 target = {GetSharedMemoryPointer(*smem), Type::Uint};
1515
1516 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1517 target = {GetGlobalMemoryPointer(*gmem), Type::Uint};
1518
1519 } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
1520 target = {custom_variables.at(cv->GetIndex()), Type::Float};
1521
1522 } else {
1523 UNIMPLEMENTED();
1524 }
1525
1526 if (!target.id) {
1527 // On failure we return a nullptr target.id, skip these stores.
1528 return {};
1529 }
1530
1531 OpStore(target.id, As(Visit(src), target.type));
1532 return {};
1533 }
1534
1535 template <u32 offset>
1536 Expression FCastHalf(Operation operation) {
1537 const Id value = AsHalfFloat(Visit(operation[0]));
1538 return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, offset)),
1539 Type::Float};
1540 }
1541
1542 Expression FSwizzleAdd(Operation operation) {
1543 const Id minus = Constant(t_float, -1.0f);
1544 const Id plus = v_float_one;
1545 const Id zero = v_float_zero;
1546 const Id lut_a = ConstantComposite(t_float4, minus, plus, minus, zero);
1547 const Id lut_b = ConstantComposite(t_float4, minus, minus, plus, minus);
1548
1549 Id mask = OpLoad(t_uint, thread_id);
1550 mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3));
1551 mask = OpShiftLeftLogical(t_uint, mask, Constant(t_uint, 1));
1552 mask = OpShiftRightLogical(t_uint, AsUint(Visit(operation[2])), mask);
1553 mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3));
1554
1555 const Id modifier_a = OpVectorExtractDynamic(t_float, lut_a, mask);
1556 const Id modifier_b = OpVectorExtractDynamic(t_float, lut_b, mask);
1557
1558 const Id op_a = OpFMul(t_float, AsFloat(Visit(operation[0])), modifier_a);
1559 const Id op_b = OpFMul(t_float, AsFloat(Visit(operation[1])), modifier_b);
1560 return {OpFAdd(t_float, op_a, op_b), Type::Float};
1561 }
1562
1563 Expression HNegate(Operation operation) {
1564 const bool is_f16 = device.IsFloat16Supported();
1565 const Id minus_one = Constant(t_scalar_half, is_f16 ? 0xbc00 : 0xbf800000);
1566 const Id one = Constant(t_scalar_half, is_f16 ? 0x3c00 : 0x3f800000);
1567 const auto GetNegate = [&](std::size_t index) {
1568 return OpSelect(t_scalar_half, AsBool(Visit(operation[index])), minus_one, one);
1569 };
1570 const Id negation = OpCompositeConstruct(t_half, GetNegate(1), GetNegate(2));
1571 return {OpFMul(t_half, AsHalfFloat(Visit(operation[0])), negation), Type::HalfFloat};
1572 }
1573
1574 Expression HClamp(Operation operation) {
1575 const auto Pack = [&](std::size_t index) {
1576 const Id scalar = GetHalfScalarFromFloat(AsFloat(Visit(operation[index])));
1577 return OpCompositeConstruct(t_half, scalar, scalar);
1578 };
1579 const Id value = AsHalfFloat(Visit(operation[0]));
1580 const Id min = Pack(1);
1581 const Id max = Pack(2);
1582
1583 const Id clamped = OpFClamp(t_half, value, min, max);
1584 if (IsPrecise(operation)) {
1585 Decorate(clamped, spv::Decoration::NoContraction);
1586 }
1587 return {clamped, Type::HalfFloat};
1588 }
1589
1590 Expression HCastFloat(Operation operation) {
1591 const Id value = GetHalfScalarFromFloat(AsFloat(Visit(operation[0])));
1592 return {OpCompositeConstruct(t_half, value, Constant(t_scalar_half, 0)), Type::HalfFloat};
1593 }
1594
1595 Expression HUnpack(Operation operation) {
1596 Expression operand = Visit(operation[0]);
1597 const auto type = std::get<Tegra::Shader::HalfType>(operation.GetMeta());
1598 if (type == Tegra::Shader::HalfType::H0_H1) {
1599 return operand;
1600 }
1601 const auto value = [&] {
1602 switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
1603 case Tegra::Shader::HalfType::F32:
1604 return GetHalfScalarFromFloat(AsFloat(operand));
1605 case Tegra::Shader::HalfType::H0_H0:
1606 return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 0);
1607 case Tegra::Shader::HalfType::H1_H1:
1608 return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 1);
1609 default:
1610 UNREACHABLE();
1611 return ConstantNull(t_half);
1612 }
1613 }();
1614 return {OpCompositeConstruct(t_half, value, value), Type::HalfFloat};
1615 }
1616
1617 Expression HMergeF32(Operation operation) {
1618 const Id value = AsHalfFloat(Visit(operation[0]));
1619 return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, 0)), Type::Float};
1620 }
1621
1622 template <u32 offset>
1623 Expression HMergeHN(Operation operation) {
1624 const Id target = AsHalfFloat(Visit(operation[0]));
1625 const Id source = AsHalfFloat(Visit(operation[1]));
1626 const Id object = OpCompositeExtract(t_scalar_half, source, offset);
1627 return {OpCompositeInsert(t_half, object, target, offset), Type::HalfFloat};
1628 }
1629
1630 Expression HPack2(Operation operation) {
1631 const Id low = GetHalfScalarFromFloat(AsFloat(Visit(operation[0])));
1632 const Id high = GetHalfScalarFromFloat(AsFloat(Visit(operation[1])));
1633 return {OpCompositeConstruct(t_half, low, high), Type::HalfFloat};
1634 }
1635
1636 Expression LogicalAddCarry(Operation operation) {
1637 const Id op_a = AsUint(Visit(operation[0]));
1638 const Id op_b = AsUint(Visit(operation[1]));
1639
1640 const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b);
1641 const Id carry = OpCompositeExtract(t_uint, result, 1);
1642 return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool};
1643 }
1644
1645 Expression LogicalAssign(Operation operation) {
1646 const Node& dest = operation[0];
1647 const Node& src = operation[1];
1648
1649 Id target{};
1650 if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
1651 ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
1652
1653 const auto index = pred->GetIndex();
1654 switch (index) {
1655 case Tegra::Shader::Pred::NeverExecute:
1656 case Tegra::Shader::Pred::UnusedIndex:
1657 // Writing to these predicates is a no-op
1658 return {};
1659 }
1660 target = predicates.at(index);
1661
1662 } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) {
1663 target = internal_flags.at(static_cast<u32>(flag->GetFlag()));
1664 }
1665
1666 OpStore(target, AsBool(Visit(src)));
1667 return {};
1668 }
1669
1670 Expression LogicalFOrdered(Operation operation) {
1671 // Emulate SPIR-V's OpOrdered
1672 const Id op_a = AsFloat(Visit(operation[0]));
1673 const Id op_b = AsFloat(Visit(operation[1]));
1674 const Id is_num_a = OpFOrdEqual(t_bool, op_a, op_a);
1675 const Id is_num_b = OpFOrdEqual(t_bool, op_b, op_b);
1676 return {OpLogicalAnd(t_bool, is_num_a, is_num_b), Type::Bool};
1677 }
1678
1679 Expression LogicalFUnordered(Operation operation) {
1680 // Emulate SPIR-V's OpUnordered
1681 const Id op_a = AsFloat(Visit(operation[0]));
1682 const Id op_b = AsFloat(Visit(operation[1]));
1683 const Id is_nan_a = OpIsNan(t_bool, op_a);
1684 const Id is_nan_b = OpIsNan(t_bool, op_b);
1685 return {OpLogicalOr(t_bool, is_nan_a, is_nan_b), Type::Bool};
1686 }
1687
1688 Id GetTextureSampler(Operation operation) {
1689 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1690 ASSERT(!meta.sampler.is_buffer);
1691
1692 const auto& entry = sampled_images.at(meta.sampler.index);
1693 Id sampler = entry.variable;
1694 if (meta.sampler.is_indexed) {
1695 const Id index = AsInt(Visit(meta.index));
1696 sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index);
1697 }
1698 return OpLoad(entry.sampler_type, sampler);
1699 }
1700
1701 Id GetTextureImage(Operation operation) {
1702 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1703 const u32 index = meta.sampler.index;
1704 if (meta.sampler.is_buffer) {
1705 const auto& entry = uniform_texels.at(index);
1706 return OpLoad(entry.image_type, entry.image);
1707 } else {
1708 const auto& entry = sampled_images.at(index);
1709 return OpImage(entry.image_type, GetTextureSampler(operation));
1710 }
1711 }
1712
1713 Id GetImage(Operation operation) {
1714 const auto& meta = std::get<MetaImage>(operation.GetMeta());
1715 const auto entry = images.at(meta.image.index);
1716 return OpLoad(entry.image_type, entry.image);
1717 }
1718
1719 Id AssembleVector(const std::vector<Id>& coords, Type type) {
1720 const Id coords_type = GetTypeVectorDefinitionLut(type).at(coords.size() - 1);
1721 return coords.size() == 1 ? coords[0] : OpCompositeConstruct(coords_type, coords);
1722 }
1723
1724 Id GetCoordinates(Operation operation, Type type) {
1725 std::vector<Id> coords;
1726 for (std::size_t i = 0; i < operation.GetOperandsCount(); ++i) {
1727 coords.push_back(As(Visit(operation[i]), type));
1728 }
1729 if (const auto meta = std::get_if<MetaTexture>(&operation.GetMeta())) {
1730 // Add array coordinate for textures
1731 if (meta->sampler.is_array) {
1732 Id array = AsInt(Visit(meta->array));
1733 if (type == Type::Float) {
1734 array = OpConvertSToF(t_float, array);
1735 }
1736 coords.push_back(array);
1737 }
1738 }
1739 return AssembleVector(coords, type);
1740 }
1741
1742 Id GetOffsetCoordinates(Operation operation) {
1743 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1744 std::vector<Id> coords;
1745 coords.reserve(meta.aoffi.size());
1746 for (const auto& coord : meta.aoffi) {
1747 coords.push_back(AsInt(Visit(coord)));
1748 }
1749 return AssembleVector(coords, Type::Int);
1750 }
1751
1752 std::pair<Id, Id> GetDerivatives(Operation operation) {
1753 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1754 const auto& derivatives = meta.derivates;
1755 ASSERT(derivatives.size() % 2 == 0);
1756
1757 const std::size_t components = derivatives.size() / 2;
1758 std::vector<Id> dx, dy;
1759 dx.reserve(components);
1760 dy.reserve(components);
1761 for (std::size_t index = 0; index < components; ++index) {
1762 dx.push_back(AsFloat(Visit(derivatives.at(index * 2 + 0))));
1763 dy.push_back(AsFloat(Visit(derivatives.at(index * 2 + 1))));
1764 }
1765 return {AssembleVector(dx, Type::Float), AssembleVector(dy, Type::Float)};
1766 }
1767
1768 Expression GetTextureElement(Operation operation, Id sample_value, Type type) {
1769 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1770 const auto type_def = GetTypeDefinition(type);
1771 return {OpCompositeExtract(type_def, sample_value, meta.element), type};
1772 }
1773
1774 Expression Texture(Operation operation) {
1775 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1776
1777 const bool can_implicit = stage == ShaderType::Fragment;
1778 const Id sampler = GetTextureSampler(operation);
1779 const Id coords = GetCoordinates(operation, Type::Float);
1780
1781 std::vector<Id> operands;
1782 spv::ImageOperandsMask mask{};
1783 if (meta.bias) {
1784 mask = mask | spv::ImageOperandsMask::Bias;
1785 operands.push_back(AsFloat(Visit(meta.bias)));
1786 }
1787
1788 if (!can_implicit) {
1789 mask = mask | spv::ImageOperandsMask::Lod;
1790 operands.push_back(v_float_zero);
1791 }
1792
1793 if (!meta.aoffi.empty()) {
1794 mask = mask | spv::ImageOperandsMask::Offset;
1795 operands.push_back(GetOffsetCoordinates(operation));
1796 }
1797
1798 if (meta.depth_compare) {
1799 // Depth sampling
1800 UNIMPLEMENTED_IF(meta.bias);
1801 const Id dref = AsFloat(Visit(meta.depth_compare));
1802 if (can_implicit) {
1803 return {
1804 OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, mask, operands),
1805 Type::Float};
1806 } else {
1807 return {
1808 OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands),
1809 Type::Float};
1810 }
1811 }
1812
1813 Id texture;
1814 if (can_implicit) {
1815 texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands);
1816 } else {
1817 texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands);
1818 }
1819 return GetTextureElement(operation, texture, Type::Float);
1820 }
1821
1822 Expression TextureLod(Operation operation) {
1823 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1824
1825 const Id sampler = GetTextureSampler(operation);
1826 const Id coords = GetCoordinates(operation, Type::Float);
1827 const Id lod = AsFloat(Visit(meta.lod));
1828
1829 spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod;
1830 std::vector<Id> operands{lod};
1831
1832 if (!meta.aoffi.empty()) {
1833 mask = mask | spv::ImageOperandsMask::Offset;
1834 operands.push_back(GetOffsetCoordinates(operation));
1835 }
1836
1837 if (meta.sampler.is_shadow) {
1838 const Id dref = AsFloat(Visit(meta.depth_compare));
1839 return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands),
1840 Type::Float};
1841 }
1842 const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands);
1843 return GetTextureElement(operation, texture, Type::Float);
1844 }
1845
1846 Expression TextureGather(Operation operation) {
1847 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1848
1849 const Id coords = GetCoordinates(operation, Type::Float);
1850
1851 spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
1852 std::vector<Id> operands;
1853 Id texture{};
1854
1855 if (!meta.aoffi.empty()) {
1856 mask = mask | spv::ImageOperandsMask::Offset;
1857 operands.push_back(GetOffsetCoordinates(operation));
1858 }
1859
1860 if (meta.sampler.is_shadow) {
1861 texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords,
1862 AsFloat(Visit(meta.depth_compare)), mask, operands);
1863 } else {
1864 u32 component_value = 0;
1865 if (meta.component) {
1866 const auto component = std::get_if<ImmediateNode>(&*meta.component);
1867 ASSERT_MSG(component, "Component is not an immediate value");
1868 component_value = component->GetValue();
1869 }
1870 texture = OpImageGather(t_float4, GetTextureSampler(operation), coords,
1871 Constant(t_uint, component_value), mask, operands);
1872 }
1873 return GetTextureElement(operation, texture, Type::Float);
1874 }
1875
1876 Expression TextureQueryDimensions(Operation operation) {
1877 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1878 UNIMPLEMENTED_IF(!meta.aoffi.empty());
1879 UNIMPLEMENTED_IF(meta.depth_compare);
1880
1881 const auto image_id = GetTextureImage(operation);
1882 if (meta.element == 3) {
1883 return {OpImageQueryLevels(t_int, image_id), Type::Int};
1884 }
1885
1886 const Id lod = AsUint(Visit(operation[0]));
1887 const std::size_t coords_count = [&meta] {
1888 switch (const auto type = meta.sampler.type) {
1889 case Tegra::Shader::TextureType::Texture1D:
1890 return 1;
1891 case Tegra::Shader::TextureType::Texture2D:
1892 case Tegra::Shader::TextureType::TextureCube:
1893 return 2;
1894 case Tegra::Shader::TextureType::Texture3D:
1895 return 3;
1896 default:
1897 UNREACHABLE_MSG("Invalid texture type={}", type);
1898 return 2;
1899 }
1900 }();
1901
1902 if (meta.element >= coords_count) {
1903 return {v_float_zero, Type::Float};
1904 }
1905
1906 const std::array<Id, 3> types = {t_int, t_int2, t_int3};
1907 const Id sizes = OpImageQuerySizeLod(types.at(coords_count - 1), image_id, lod);
1908 const Id size = OpCompositeExtract(t_int, sizes, meta.element);
1909 return {size, Type::Int};
1910 }
1911
1912 Expression TextureQueryLod(Operation operation) {
1913 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1914 UNIMPLEMENTED_IF(!meta.aoffi.empty());
1915 UNIMPLEMENTED_IF(meta.depth_compare);
1916
1917 if (meta.element >= 2) {
1918 UNREACHABLE_MSG("Invalid element");
1919 return {v_float_zero, Type::Float};
1920 }
1921 const auto sampler_id = GetTextureSampler(operation);
1922
1923 const Id multiplier = Constant(t_float, 256.0f);
1924 const Id multipliers = ConstantComposite(t_float2, multiplier, multiplier);
1925
1926 const Id coords = GetCoordinates(operation, Type::Float);
1927 Id size = OpImageQueryLod(t_float2, sampler_id, coords);
1928 size = OpFMul(t_float2, size, multipliers);
1929 size = OpConvertFToS(t_int2, size);
1930 return GetTextureElement(operation, size, Type::Int);
1931 }
1932
1933 Expression TexelFetch(Operation operation) {
1934 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1935 UNIMPLEMENTED_IF(meta.depth_compare);
1936
1937 const Id image = GetTextureImage(operation);
1938 const Id coords = GetCoordinates(operation, Type::Int);
1939
1940 spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
1941 std::vector<Id> operands;
1942 Id fetch;
1943
1944 if (meta.lod && !meta.sampler.is_buffer) {
1945 mask = mask | spv::ImageOperandsMask::Lod;
1946 operands.push_back(AsInt(Visit(meta.lod)));
1947 }
1948
1949 if (!meta.aoffi.empty()) {
1950 mask = mask | spv::ImageOperandsMask::Offset;
1951 operands.push_back(GetOffsetCoordinates(operation));
1952 }
1953
1954 fetch = OpImageFetch(t_float4, image, coords, mask, operands);
1955 return GetTextureElement(operation, fetch, Type::Float);
1956 }
1957
1958 Expression TextureGradient(Operation operation) {
1959 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1960 UNIMPLEMENTED_IF(!meta.aoffi.empty());
1961
1962 const Id sampler = GetTextureSampler(operation);
1963 const Id coords = GetCoordinates(operation, Type::Float);
1964 const auto [dx, dy] = GetDerivatives(operation);
1965 const std::vector grad = {dx, dy};
1966
1967 static constexpr auto mask = spv::ImageOperandsMask::Grad;
1968 const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, grad);
1969 return GetTextureElement(operation, texture, Type::Float);
1970 }
1971
1972 Expression ImageLoad(Operation operation) {
1973 if (!device.IsFormatlessImageLoadSupported()) {
1974 return {v_float_zero, Type::Float};
1975 }
1976
1977 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
1978
1979 const Id coords = GetCoordinates(operation, Type::Int);
1980 const Id texel = OpImageRead(t_uint4, GetImage(operation), coords);
1981
1982 return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint};
1983 }
1984
1985 Expression ImageStore(Operation operation) {
1986 const auto meta{std::get<MetaImage>(operation.GetMeta())};
1987 std::vector<Id> colors;
1988 for (const auto& value : meta.values) {
1989 colors.push_back(AsUint(Visit(value)));
1990 }
1991
1992 const Id coords = GetCoordinates(operation, Type::Int);
1993 const Id texel = OpCompositeConstruct(t_uint4, colors);
1994
1995 OpImageWrite(GetImage(operation), coords, texel, {});
1996 return {};
1997 }
1998
1999 template <Id (Module::*func)(Id, Id, Id, Id, Id)>
2000 Expression AtomicImage(Operation operation) {
2001 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
2002 ASSERT(meta.values.size() == 1);
2003
2004 const Id coordinate = GetCoordinates(operation, Type::Int);
2005 const Id image = images.at(meta.image.index).image;
2006 const Id sample = v_uint_zero;
2007 const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample);
2008
2009 const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
2010 const Id semantics = v_uint_zero;
2011 const Id value = AsUint(Visit(meta.values[0]));
2012 return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
2013 }
2014
2015 template <Id (Module::*func)(Id, Id, Id, Id, Id)>
2016 Expression Atomic(Operation operation) {
2017 Id pointer;
2018 if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
2019 pointer = GetSharedMemoryPointer(*smem);
2020 } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
2021 pointer = GetGlobalMemoryPointer(*gmem);
2022 } else {
2023 UNREACHABLE();
2024 return {v_float_zero, Type::Float};
2025 }
2026 const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
2027 const Id semantics = v_uint_zero;
2028 const Id value = AsUint(Visit(operation[1]));
2029
2030 return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
2031 }
2032
2033 template <Id (Module::*func)(Id, Id, Id, Id, Id)>
2034 Expression Reduce(Operation operation) {
2035 Atomic<func>(operation);
2036 return {};
2037 }
2038
2039 Expression Branch(Operation operation) {
2040 const auto& target = std::get<ImmediateNode>(*operation[0]);
2041 OpStore(jmp_to, Constant(t_uint, target.GetValue()));
2042 OpBranch(continue_label);
2043 inside_branch = true;
2044 if (!conditional_branch_set) {
2045 AddLabel();
2046 }
2047 return {};
2048 }
2049
2050 Expression BranchIndirect(Operation operation) {
2051 const Id op_a = AsUint(Visit(operation[0]));
2052
2053 OpStore(jmp_to, op_a);
2054 OpBranch(continue_label);
2055 inside_branch = true;
2056 if (!conditional_branch_set) {
2057 AddLabel();
2058 }
2059 return {};
2060 }
2061
2062 Expression PushFlowStack(Operation operation) {
2063 const auto& target = std::get<ImmediateNode>(*operation[0]);
2064 const auto [flow_stack, flow_stack_top] = GetFlowStack(operation);
2065 const Id current = OpLoad(t_uint, flow_stack_top);
2066 const Id next = OpIAdd(t_uint, current, Constant(t_uint, 1));
2067 const Id access = OpAccessChain(t_func_uint, flow_stack, current);
2068
2069 OpStore(access, Constant(t_uint, target.GetValue()));
2070 OpStore(flow_stack_top, next);
2071 return {};
2072 }
2073
2074 Expression PopFlowStack(Operation operation) {
2075 const auto [flow_stack, flow_stack_top] = GetFlowStack(operation);
2076 const Id current = OpLoad(t_uint, flow_stack_top);
2077 const Id previous = OpISub(t_uint, current, Constant(t_uint, 1));
2078 const Id access = OpAccessChain(t_func_uint, flow_stack, previous);
2079 const Id target = OpLoad(t_uint, access);
2080
2081 OpStore(flow_stack_top, previous);
2082 OpStore(jmp_to, target);
2083 OpBranch(continue_label);
2084 inside_branch = true;
2085 if (!conditional_branch_set) {
2086 AddLabel();
2087 }
2088 return {};
2089 }
2090
2091 Id MaxwellToSpirvComparison(Maxwell::ComparisonOp compare_op, Id operand_1, Id operand_2) {
2092 using Compare = Maxwell::ComparisonOp;
2093 switch (compare_op) {
2094 case Compare::NeverOld:
2095 return v_false; // Never let the test pass
2096 case Compare::LessOld:
2097 return OpFOrdLessThan(t_bool, operand_1, operand_2);
2098 case Compare::EqualOld:
2099 return OpFOrdEqual(t_bool, operand_1, operand_2);
2100 case Compare::LessEqualOld:
2101 return OpFOrdLessThanEqual(t_bool, operand_1, operand_2);
2102 case Compare::GreaterOld:
2103 return OpFOrdGreaterThan(t_bool, operand_1, operand_2);
2104 case Compare::NotEqualOld:
2105 return OpFOrdNotEqual(t_bool, operand_1, operand_2);
2106 case Compare::GreaterEqualOld:
2107 return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2);
2108 default:
2109 UNREACHABLE();
2110 return v_true;
2111 }
2112 }
2113
2114 void AlphaTest(Id pointer) {
2115 if (specialization.alpha_test_func == Maxwell::ComparisonOp::AlwaysOld) {
2116 return;
2117 }
2118 const Id true_label = OpLabel();
2119 const Id discard_label = OpLabel();
2120 const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref);
2121 const Id alpha_value = OpLoad(t_float, pointer);
2122 const Id condition =
2123 MaxwellToSpirvComparison(specialization.alpha_test_func, alpha_value, alpha_reference);
2124
2125 OpBranchConditional(condition, true_label, discard_label);
2126 AddLabel(discard_label);
2127 OpKill();
2128 AddLabel(true_label);
2129 }
2130
2131 void PreExit() {
2132 if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) {
2133 const u32 position_index = out_indices.position.value();
2134 const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U);
2135 const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U);
2136 Id depth = OpLoad(t_float, z_pointer);
2137 depth = OpFAdd(t_float, depth, OpLoad(t_float, w_pointer));
2138 depth = OpFMul(t_float, depth, Constant(t_float, 0.5f));
2139 OpStore(z_pointer, depth);
2140 }
2141 if (stage == ShaderType::Fragment) {
2142 const auto SafeGetRegister = [this](u32 reg) {
2143 if (const auto it = registers.find(reg); it != registers.end()) {
2144 return OpLoad(t_float, it->second);
2145 }
2146 return v_float_zero;
2147 };
2148
2149 UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0,
2150 "Sample mask write is unimplemented");
2151
2152 // Write the color outputs using the data in the shader registers, disabled
2153 // rendertargets/components are skipped in the register assignment.
2154 u32 current_reg = 0;
2155 for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
2156 // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
2157 for (u32 component = 0; component < 4; ++component) {
2158 if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
2159 continue;
2160 }
2161 const Id pointer = AccessElement(t_out_float, frag_colors[rt], component);
2162 OpStore(pointer, SafeGetRegister(current_reg));
2163 if (rt == 0 && component == 3) {
2164 AlphaTest(pointer);
2165 }
2166 ++current_reg;
2167 }
2168 }
2169 if (header.ps.omap.depth) {
2170 // The depth output is always 2 registers after the last color output, and
2171 // current_reg already contains one past the last color register.
2172 OpStore(frag_depth, SafeGetRegister(current_reg + 1));
2173 }
2174 }
2175 }
2176
2177 Expression Exit(Operation operation) {
2178 PreExit();
2179 inside_branch = true;
2180 if (conditional_branch_set) {
2181 OpReturn();
2182 } else {
2183 const Id dummy = OpLabel();
2184 OpBranch(dummy);
2185 AddLabel(dummy);
2186 OpReturn();
2187 AddLabel();
2188 }
2189 return {};
2190 }
2191
2192 Expression Discard(Operation operation) {
2193 inside_branch = true;
2194 if (conditional_branch_set) {
2195 OpKill();
2196 } else {
2197 const Id dummy = OpLabel();
2198 OpBranch(dummy);
2199 AddLabel(dummy);
2200 OpKill();
2201 AddLabel();
2202 }
2203 return {};
2204 }
2205
2206 Expression EmitVertex(Operation) {
2207 OpEmitVertex();
2208 return {};
2209 }
2210
2211 Expression EndPrimitive(Operation operation) {
2212 OpEndPrimitive();
2213 return {};
2214 }
2215
2216 Expression InvocationId(Operation) {
2217 return {OpLoad(t_int, invocation_id), Type::Int};
2218 }
2219
2220 Expression YNegate(Operation) {
2221 LOG_WARNING(Render_Vulkan, "(STUBBED)");
2222 return {Constant(t_float, 1.0f), Type::Float};
2223 }
2224
2225 template <u32 element>
2226 Expression LocalInvocationId(Operation) {
2227 const Id id = OpLoad(t_uint3, local_invocation_id);
2228 return {OpCompositeExtract(t_uint, id, element), Type::Uint};
2229 }
2230
2231 template <u32 element>
2232 Expression WorkGroupId(Operation operation) {
2233 const Id id = OpLoad(t_uint3, workgroup_id);
2234 return {OpCompositeExtract(t_uint, id, element), Type::Uint};
2235 }
2236
2237 Expression BallotThread(Operation operation) {
2238 const Id predicate = AsBool(Visit(operation[0]));
2239 const Id ballot = OpSubgroupBallotKHR(t_uint4, predicate);
2240
2241 if (!device.IsWarpSizePotentiallyBiggerThanGuest()) {
2242 // Guest-like devices can just return the first index.
2243 return {OpCompositeExtract(t_uint, ballot, 0U), Type::Uint};
2244 }
2245
2246 // The others will have to return what is local to the current thread.
2247 // For instance a device with a warp size of 64 will return the upper uint when the current
2248 // thread is 38.
2249 const Id tid = OpLoad(t_uint, thread_id);
2250 const Id thread_index = OpShiftRightLogical(t_uint, tid, Constant(t_uint, 5));
2251 return {OpVectorExtractDynamic(t_uint, ballot, thread_index), Type::Uint};
2252 }
2253
2254 template <Id (Module::*func)(Id, Id)>
2255 Expression Vote(Operation operation) {
2256 // TODO(Rodrigo): Handle devices with different warp sizes
2257 const Id predicate = AsBool(Visit(operation[0]));
2258 return {(this->*func)(t_bool, predicate), Type::Bool};
2259 }
2260
2261 Expression ThreadId(Operation) {
2262 return {OpLoad(t_uint, thread_id), Type::Uint};
2263 }
2264
2265 template <std::size_t index>
2266 Expression ThreadMask(Operation) {
2267 // TODO(Rodrigo): Handle devices with different warp sizes
2268 const Id mask = thread_masks[index];
2269 return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint};
2270 }
2271
2272 Expression ShuffleIndexed(Operation operation) {
2273 const Id value = AsFloat(Visit(operation[0]));
2274 const Id index = AsUint(Visit(operation[1]));
2275 return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float};
2276 }
2277
2278 Expression Barrier(Operation) {
2279 if (!ir.IsDecompiled()) {
2280 LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled");
2281 return {};
2282 }
2283
2284 const auto scope = spv::Scope::Workgroup;
2285 const auto memory = spv::Scope::Workgroup;
2286 const auto semantics =
2287 spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease;
2288 OpControlBarrier(Constant(t_uint, static_cast<u32>(scope)),
2289 Constant(t_uint, static_cast<u32>(memory)),
2290 Constant(t_uint, static_cast<u32>(semantics)));
2291 return {};
2292 }
2293
2294 template <spv::Scope scope>
2295 Expression MemoryBarrier(Operation) {
2296 const auto semantics =
2297 spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
2298 spv::MemorySemanticsMask::WorkgroupMemory |
2299 spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory;
2300
2301 OpMemoryBarrier(Constant(t_uint, static_cast<u32>(scope)),
2302 Constant(t_uint, static_cast<u32>(semantics)));
2303 return {};
2304 }
2305
2306 Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) {
2307 const Id id = OpVariable(type, storage);
2308 Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin));
2309 AddGlobalVariable(Name(id, std::move(name)));
2310 interfaces.push_back(id);
2311 return id;
2312 }
2313
2314 Id DeclareInputBuiltIn(spv::BuiltIn builtin, Id type, std::string name) {
2315 return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name));
2316 }
2317
2318 template <typename... Args>
2319 Id AccessElement(Id pointer_type, Id composite, Args... elements_) {
2320 std::vector<Id> members;
2321 auto elements = {elements_...};
2322 for (const auto element : elements) {
2323 members.push_back(Constant(t_uint, element));
2324 }
2325
2326 return OpAccessChain(pointer_type, composite, members);
2327 }
2328
2329 Id As(Expression expr, Type wanted_type) {
2330 switch (wanted_type) {
2331 case Type::Bool:
2332 return AsBool(expr);
2333 case Type::Bool2:
2334 return AsBool2(expr);
2335 case Type::Float:
2336 return AsFloat(expr);
2337 case Type::Int:
2338 return AsInt(expr);
2339 case Type::Uint:
2340 return AsUint(expr);
2341 case Type::HalfFloat:
2342 return AsHalfFloat(expr);
2343 default:
2344 UNREACHABLE();
2345 return expr.id;
2346 }
2347 }
2348
2349 Id AsBool(Expression expr) {
2350 ASSERT(expr.type == Type::Bool);
2351 return expr.id;
2352 }
2353
2354 Id AsBool2(Expression expr) {
2355 ASSERT(expr.type == Type::Bool2);
2356 return expr.id;
2357 }
2358
2359 Id AsFloat(Expression expr) {
2360 switch (expr.type) {
2361 case Type::Float:
2362 return expr.id;
2363 case Type::Int:
2364 case Type::Uint:
2365 return OpBitcast(t_float, expr.id);
2366 case Type::HalfFloat:
2367 if (device.IsFloat16Supported()) {
2368 return OpBitcast(t_float, expr.id);
2369 }
2370 return OpBitcast(t_float, OpPackHalf2x16(t_uint, expr.id));
2371 default:
2372 UNREACHABLE();
2373 return expr.id;
2374 }
2375 }
2376
2377 Id AsInt(Expression expr) {
2378 switch (expr.type) {
2379 case Type::Int:
2380 return expr.id;
2381 case Type::Float:
2382 case Type::Uint:
2383 return OpBitcast(t_int, expr.id);
2384 case Type::HalfFloat:
2385 if (device.IsFloat16Supported()) {
2386 return OpBitcast(t_int, expr.id);
2387 }
2388 return OpPackHalf2x16(t_int, expr.id);
2389 default:
2390 UNREACHABLE();
2391 return expr.id;
2392 }
2393 }
2394
2395 Id AsUint(Expression expr) {
2396 switch (expr.type) {
2397 case Type::Uint:
2398 return expr.id;
2399 case Type::Float:
2400 case Type::Int:
2401 return OpBitcast(t_uint, expr.id);
2402 case Type::HalfFloat:
2403 if (device.IsFloat16Supported()) {
2404 return OpBitcast(t_uint, expr.id);
2405 }
2406 return OpPackHalf2x16(t_uint, expr.id);
2407 default:
2408 UNREACHABLE();
2409 return expr.id;
2410 }
2411 }
2412
2413 Id AsHalfFloat(Expression expr) {
2414 switch (expr.type) {
2415 case Type::HalfFloat:
2416 return expr.id;
2417 case Type::Float:
2418 case Type::Int:
2419 case Type::Uint:
2420 if (device.IsFloat16Supported()) {
2421 return OpBitcast(t_half, expr.id);
2422 }
2423 return OpUnpackHalf2x16(t_half, AsUint(expr));
2424 default:
2425 UNREACHABLE();
2426 return expr.id;
2427 }
2428 }
2429
2430 Id GetHalfScalarFromFloat(Id value) {
2431 if (device.IsFloat16Supported()) {
2432 return OpFConvert(t_scalar_half, value);
2433 }
2434 return value;
2435 }
2436
2437 Id GetFloatFromHalfScalar(Id value) {
2438 if (device.IsFloat16Supported()) {
2439 return OpFConvert(t_float, value);
2440 }
2441 return value;
2442 }
2443
2444 AttributeType GetAttributeType(u32 location) const {
2445 if (stage != ShaderType::Vertex) {
2446 return {Type::Float, t_in_float, t_in_float4};
2447 }
2448 switch (specialization.attribute_types.at(location)) {
2449 case Maxwell::VertexAttribute::Type::SignedNorm:
2450 case Maxwell::VertexAttribute::Type::UnsignedNorm:
2451 case Maxwell::VertexAttribute::Type::UnsignedScaled:
2452 case Maxwell::VertexAttribute::Type::SignedScaled:
2453 case Maxwell::VertexAttribute::Type::Float:
2454 return {Type::Float, t_in_float, t_in_float4};
2455 case Maxwell::VertexAttribute::Type::SignedInt:
2456 return {Type::Int, t_in_int, t_in_int4};
2457 case Maxwell::VertexAttribute::Type::UnsignedInt:
2458 return {Type::Uint, t_in_uint, t_in_uint4};
2459 default:
2460 UNREACHABLE();
2461 return {Type::Float, t_in_float, t_in_float4};
2462 }
2463 }
2464
2465 Id GetTypeDefinition(Type type) const {
2466 switch (type) {
2467 case Type::Bool:
2468 return t_bool;
2469 case Type::Bool2:
2470 return t_bool2;
2471 case Type::Float:
2472 return t_float;
2473 case Type::Int:
2474 return t_int;
2475 case Type::Uint:
2476 return t_uint;
2477 case Type::HalfFloat:
2478 return t_half;
2479 default:
2480 UNREACHABLE();
2481 return {};
2482 }
2483 }
2484
2485 std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const {
2486 switch (type) {
2487 case Type::Float:
2488 return {t_float, t_float2, t_float3, t_float4};
2489 case Type::Int:
2490 return {t_int, t_int2, t_int3, t_int4};
2491 case Type::Uint:
2492 return {t_uint, t_uint2, t_uint3, t_uint4};
2493 default:
2494 UNIMPLEMENTED();
2495 return {};
2496 }
2497 }
2498
2499 std::tuple<Id, Id> CreateFlowStack() {
2500 // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely
2501 // that shaders will use 20 nested SSYs and PBKs.
2502 constexpr u32 FLOW_STACK_SIZE = 20;
2503 constexpr auto storage_class = spv::StorageClass::Function;
2504
2505 const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE));
2506 const Id stack = OpVariable(TypePointer(storage_class, flow_stack_type), storage_class,
2507 ConstantNull(flow_stack_type));
2508 const Id top = OpVariable(t_func_uint, storage_class, Constant(t_uint, 0));
2509 AddLocalVariable(stack);
2510 AddLocalVariable(top);
2511 return std::tie(stack, top);
2512 }
2513
2514 std::pair<Id, Id> GetFlowStack(Operation operation) {
2515 const auto stack_class = std::get<MetaStackClass>(operation.GetMeta());
2516 switch (stack_class) {
2517 case MetaStackClass::Ssy:
2518 return {ssy_flow_stack, ssy_flow_stack_top};
2519 case MetaStackClass::Pbk:
2520 return {pbk_flow_stack, pbk_flow_stack_top};
2521 }
2522 UNREACHABLE();
2523 return {};
2524 }
2525
2526 Id GetGlobalMemoryPointer(const GmemNode& gmem) {
2527 const Id real = AsUint(Visit(gmem.GetRealAddress()));
2528 const Id base = AsUint(Visit(gmem.GetBaseAddress()));
2529 const Id diff = OpISub(t_uint, real, base);
2530 const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
2531 const Id buffer = global_buffers.at(gmem.GetDescriptor());
2532 return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset);
2533 }
2534
2535 Id GetSharedMemoryPointer(const SmemNode& smem) {
2536 ASSERT(stage == ShaderType::Compute);
2537 Id address = AsUint(Visit(smem.GetAddress()));
2538 address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
2539 return OpAccessChain(t_smem_uint, shared_memory, address);
2540 }
2541
2542 static constexpr std::array operation_decompilers = {
2543 &SPIRVDecompiler::Assign,
2544
2545 &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
2546 Type::Float>,
2547
2548 &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::Float>,
2549 &SPIRVDecompiler::Binary<&Module::OpFMul, Type::Float>,
2550 &SPIRVDecompiler::Binary<&Module::OpFDiv, Type::Float>,
2551 &SPIRVDecompiler::Ternary<&Module::OpFma, Type::Float>,
2552 &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>,
2553 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>,
2554 &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>,
2555 &SPIRVDecompiler::FCastHalf<0>,
2556 &SPIRVDecompiler::FCastHalf<1>,
2557 &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>,
2558 &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>,
2559 &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>,
2560 &SPIRVDecompiler::Unary<&Module::OpSin, Type::Float>,
2561 &SPIRVDecompiler::Unary<&Module::OpExp2, Type::Float>,
2562 &SPIRVDecompiler::Unary<&Module::OpLog2, Type::Float>,
2563 &SPIRVDecompiler::Unary<&Module::OpInverseSqrt, Type::Float>,
2564 &SPIRVDecompiler::Unary<&Module::OpSqrt, Type::Float>,
2565 &SPIRVDecompiler::Unary<&Module::OpRoundEven, Type::Float>,
2566 &SPIRVDecompiler::Unary<&Module::OpFloor, Type::Float>,
2567 &SPIRVDecompiler::Unary<&Module::OpCeil, Type::Float>,
2568 &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>,
2569 &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>,
2570 &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>,
2571 &SPIRVDecompiler::FSwizzleAdd,
2572
2573 &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>,
2574 &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>,
2575 &SPIRVDecompiler::Binary<&Module::OpSDiv, Type::Int>,
2576 &SPIRVDecompiler::Unary<&Module::OpSNegate, Type::Int>,
2577 &SPIRVDecompiler::Unary<&Module::OpSAbs, Type::Int>,
2578 &SPIRVDecompiler::Binary<&Module::OpSMin, Type::Int>,
2579 &SPIRVDecompiler::Binary<&Module::OpSMax, Type::Int>,
2580
2581 &SPIRVDecompiler::Unary<&Module::OpConvertFToS, Type::Int, Type::Float>,
2582 &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Int, Type::Uint>,
2583 &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Int, Type::Int, Type::Uint>,
2584 &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Int, Type::Int, Type::Uint>,
2585 &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Int, Type::Int, Type::Uint>,
2586 &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Int>,
2587 &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Int>,
2588 &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Int>,
2589 &SPIRVDecompiler::Unary<&Module::OpNot, Type::Int>,
2590 &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>,
2591 &SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>,
2592 &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>,
2593 &SPIRVDecompiler::Unary<&Module::OpFindSMsb, Type::Int>,
2594
2595 &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>,
2596 &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>,
2597 &SPIRVDecompiler::Binary<&Module::OpUDiv, Type::Uint>,
2598 &SPIRVDecompiler::Binary<&Module::OpUMin, Type::Uint>,
2599 &SPIRVDecompiler::Binary<&Module::OpUMax, Type::Uint>,
2600 &SPIRVDecompiler::Unary<&Module::OpConvertFToU, Type::Uint, Type::Float>,
2601 &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Uint, Type::Int>,
2602 &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Uint>,
2603 &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>,
2604 &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>,
2605 &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Uint>,
2606 &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Uint>,
2607 &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Uint>,
2608 &SPIRVDecompiler::Unary<&Module::OpNot, Type::Uint>,
2609 &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>,
2610 &SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>,
2611 &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>,
2612 &SPIRVDecompiler::Unary<&Module::OpFindUMsb, Type::Uint>,
2613
2614 &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>,
2615 &SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>,
2616 &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>,
2617 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>,
2618 &SPIRVDecompiler::HNegate,
2619 &SPIRVDecompiler::HClamp,
2620 &SPIRVDecompiler::HCastFloat,
2621 &SPIRVDecompiler::HUnpack,
2622 &SPIRVDecompiler::HMergeF32,
2623 &SPIRVDecompiler::HMergeHN<0>,
2624 &SPIRVDecompiler::HMergeHN<1>,
2625 &SPIRVDecompiler::HPack2,
2626
2627 &SPIRVDecompiler::LogicalAssign,
2628 &SPIRVDecompiler::Binary<&Module::OpLogicalAnd, Type::Bool>,
2629 &SPIRVDecompiler::Binary<&Module::OpLogicalOr, Type::Bool>,
2630 &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
2631 &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
2632 &SPIRVDecompiler::Binary<&Module::OpVectorExtractDynamic, Type::Bool, Type::Bool2,
2633 Type::Uint>,
2634 &SPIRVDecompiler::Unary<&Module::OpAll, Type::Bool, Type::Bool2>,
2635
2636 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
2637 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
2638 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::Float>,
2639 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>,
2640 &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>,
2641 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>,
2642 &SPIRVDecompiler::LogicalFOrdered,
2643 &SPIRVDecompiler::LogicalFUnordered,
2644 &SPIRVDecompiler::Binary<&Module::OpFUnordLessThan, Type::Bool, Type::Float>,
2645 &SPIRVDecompiler::Binary<&Module::OpFUnordEqual, Type::Bool, Type::Float>,
2646 &SPIRVDecompiler::Binary<&Module::OpFUnordLessThanEqual, Type::Bool, Type::Float>,
2647 &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThan, Type::Bool, Type::Float>,
2648 &SPIRVDecompiler::Binary<&Module::OpFUnordNotEqual, Type::Bool, Type::Float>,
2649 &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThanEqual, Type::Bool, Type::Float>,
2650
2651 &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>,
2652 &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>,
2653 &SPIRVDecompiler::Binary<&Module::OpSLessThanEqual, Type::Bool, Type::Int>,
2654 &SPIRVDecompiler::Binary<&Module::OpSGreaterThan, Type::Bool, Type::Int>,
2655 &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Int>,
2656 &SPIRVDecompiler::Binary<&Module::OpSGreaterThanEqual, Type::Bool, Type::Int>,
2657
2658 &SPIRVDecompiler::Binary<&Module::OpULessThan, Type::Bool, Type::Uint>,
2659 &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Uint>,
2660 &SPIRVDecompiler::Binary<&Module::OpULessThanEqual, Type::Bool, Type::Uint>,
2661 &SPIRVDecompiler::Binary<&Module::OpUGreaterThan, Type::Bool, Type::Uint>,
2662 &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>,
2663 &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>,
2664
2665 &SPIRVDecompiler::LogicalAddCarry,
2666
2667 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>,
2668 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>,
2669 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>,
2670 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>,
2671 &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>,
2672 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>,
2673 // TODO(Rodrigo): Should these use the OpFUnord* variants?
2674 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>,
2675 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>,
2676 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>,
2677 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>,
2678 &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>,
2679 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>,
2680
2681 &SPIRVDecompiler::Texture,
2682 &SPIRVDecompiler::TextureLod,
2683 &SPIRVDecompiler::TextureGather,
2684 &SPIRVDecompiler::TextureQueryDimensions,
2685 &SPIRVDecompiler::TextureQueryLod,
2686 &SPIRVDecompiler::TexelFetch,
2687 &SPIRVDecompiler::TextureGradient,
2688
2689 &SPIRVDecompiler::ImageLoad,
2690 &SPIRVDecompiler::ImageStore,
2691 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>,
2692 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>,
2693 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>,
2694 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>,
2695 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>,
2696
2697 &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
2698 &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
2699 &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>,
2700 &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>,
2701 &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
2702 &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
2703 &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
2704
2705 &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
2706 &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
2707 &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>,
2708 &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>,
2709 &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
2710 &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
2711 &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
2712
2713 &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
2714 &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>,
2715 &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>,
2716 &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
2717 &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
2718 &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
2719
2720 &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
2721 &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>,
2722 &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>,
2723 &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
2724 &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
2725 &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
2726
2727 &SPIRVDecompiler::Branch,
2728 &SPIRVDecompiler::BranchIndirect,
2729 &SPIRVDecompiler::PushFlowStack,
2730 &SPIRVDecompiler::PopFlowStack,
2731 &SPIRVDecompiler::Exit,
2732 &SPIRVDecompiler::Discard,
2733
2734 &SPIRVDecompiler::EmitVertex,
2735 &SPIRVDecompiler::EndPrimitive,
2736
2737 &SPIRVDecompiler::InvocationId,
2738 &SPIRVDecompiler::YNegate,
2739 &SPIRVDecompiler::LocalInvocationId<0>,
2740 &SPIRVDecompiler::LocalInvocationId<1>,
2741 &SPIRVDecompiler::LocalInvocationId<2>,
2742 &SPIRVDecompiler::WorkGroupId<0>,
2743 &SPIRVDecompiler::WorkGroupId<1>,
2744 &SPIRVDecompiler::WorkGroupId<2>,
2745
2746 &SPIRVDecompiler::BallotThread,
2747 &SPIRVDecompiler::Vote<&Module::OpSubgroupAllKHR>,
2748 &SPIRVDecompiler::Vote<&Module::OpSubgroupAnyKHR>,
2749 &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>,
2750
2751 &SPIRVDecompiler::ThreadId,
2752 &SPIRVDecompiler::ThreadMask<0>, // Eq
2753 &SPIRVDecompiler::ThreadMask<1>, // Ge
2754 &SPIRVDecompiler::ThreadMask<2>, // Gt
2755 &SPIRVDecompiler::ThreadMask<3>, // Le
2756 &SPIRVDecompiler::ThreadMask<4>, // Lt
2757 &SPIRVDecompiler::ShuffleIndexed,
2758
2759 &SPIRVDecompiler::Barrier,
2760 &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>,
2761 &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>,
2762 };
2763 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2764
2765 const Device& device;
2766 const ShaderIR& ir;
2767 const ShaderType stage;
2768 const Tegra::Shader::Header header;
2769 const Registry& registry;
2770 const Specialization& specialization;
2771 std::unordered_map<u8, VaryingTFB> transform_feedback;
2772
2773 const Id t_void = Name(TypeVoid(), "void");
2774
2775 const Id t_bool = Name(TypeBool(), "bool");
2776 const Id t_bool2 = Name(TypeVector(t_bool, 2), "bool2");
2777
2778 const Id t_int = Name(TypeInt(32, true), "int");
2779 const Id t_int2 = Name(TypeVector(t_int, 2), "int2");
2780 const Id t_int3 = Name(TypeVector(t_int, 3), "int3");
2781 const Id t_int4 = Name(TypeVector(t_int, 4), "int4");
2782
2783 const Id t_uint = Name(TypeInt(32, false), "uint");
2784 const Id t_uint2 = Name(TypeVector(t_uint, 2), "uint2");
2785 const Id t_uint3 = Name(TypeVector(t_uint, 3), "uint3");
2786 const Id t_uint4 = Name(TypeVector(t_uint, 4), "uint4");
2787
2788 const Id t_float = Name(TypeFloat(32), "float");
2789 const Id t_float2 = Name(TypeVector(t_float, 2), "float2");
2790 const Id t_float3 = Name(TypeVector(t_float, 3), "float3");
2791 const Id t_float4 = Name(TypeVector(t_float, 4), "float4");
2792
2793 const Id t_prv_bool = Name(TypePointer(spv::StorageClass::Private, t_bool), "prv_bool");
2794 const Id t_prv_float = Name(TypePointer(spv::StorageClass::Private, t_float), "prv_float");
2795
2796 const Id t_func_uint = Name(TypePointer(spv::StorageClass::Function, t_uint), "func_uint");
2797
2798 const Id t_in_bool = Name(TypePointer(spv::StorageClass::Input, t_bool), "in_bool");
2799 const Id t_in_int = Name(TypePointer(spv::StorageClass::Input, t_int), "in_int");
2800 const Id t_in_int4 = Name(TypePointer(spv::StorageClass::Input, t_int4), "in_int4");
2801 const Id t_in_uint = Name(TypePointer(spv::StorageClass::Input, t_uint), "in_uint");
2802 const Id t_in_uint3 = Name(TypePointer(spv::StorageClass::Input, t_uint3), "in_uint3");
2803 const Id t_in_uint4 = Name(TypePointer(spv::StorageClass::Input, t_uint4), "in_uint4");
2804 const Id t_in_float = Name(TypePointer(spv::StorageClass::Input, t_float), "in_float");
2805 const Id t_in_float2 = Name(TypePointer(spv::StorageClass::Input, t_float2), "in_float2");
2806 const Id t_in_float3 = Name(TypePointer(spv::StorageClass::Input, t_float3), "in_float3");
2807 const Id t_in_float4 = Name(TypePointer(spv::StorageClass::Input, t_float4), "in_float4");
2808
2809 const Id t_out_int = Name(TypePointer(spv::StorageClass::Output, t_int), "out_int");
2810
2811 const Id t_out_float = Name(TypePointer(spv::StorageClass::Output, t_float), "out_float");
2812 const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4");
2813
2814 const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float);
2815 const Id t_cbuf_std140 = Decorate(
2816 Name(TypeArray(t_float4, Constant(t_uint, MaxConstBufferElements)), "CbufStd140Array"),
2817 spv::Decoration::ArrayStride, 16U);
2818 const Id t_cbuf_scalar = Decorate(
2819 Name(TypeArray(t_float, Constant(t_uint, MaxConstBufferFloats)), "CbufScalarArray"),
2820 spv::Decoration::ArrayStride, 4U);
2821 const Id t_cbuf_std140_struct = MemberDecorate(
2822 Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
2823 const Id t_cbuf_scalar_struct = MemberDecorate(
2824 Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
2825 const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct);
2826 const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct);
2827
2828 Id t_smem_uint{};
2829
2830 const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint);
2831 const Id t_gmem_array =
2832 Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray");
2833 const Id t_gmem_struct = MemberDecorate(
2834 Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
2835 const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
2836
2837 const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint);
2838
2839 const Id v_float_zero = Constant(t_float, 0.0f);
2840 const Id v_float_one = Constant(t_float, 1.0f);
2841 const Id v_uint_zero = Constant(t_uint, 0);
2842
2843 // Nvidia uses these defaults for varyings (e.g. position and generic attributes)
2844 const Id v_varying_default =
2845 ConstantComposite(t_float4, v_float_zero, v_float_zero, v_float_zero, v_float_one);
2846
2847 const Id v_true = ConstantTrue(t_bool);
2848 const Id v_false = ConstantFalse(t_bool);
2849
2850 Id t_scalar_half{};
2851 Id t_half{};
2852
2853 Id out_vertex{};
2854 Id in_vertex{};
2855 std::map<u32, Id> registers;
2856 std::map<u32, Id> custom_variables;
2857 std::map<Tegra::Shader::Pred, Id> predicates;
2858 std::map<u32, Id> flow_variables;
2859 Id local_memory{};
2860 Id shared_memory{};
2861 std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
2862 std::map<Attribute::Index, Id> input_attributes;
2863 std::unordered_map<u8, GenericVaryingDescription> output_attributes;
2864 std::map<u32, Id> constant_buffers;
2865 std::map<GlobalMemoryBase, Id> global_buffers;
2866 std::map<u32, TexelBuffer> uniform_texels;
2867 std::map<u32, SampledImage> sampled_images;
2868 std::map<u32, StorageImage> images;
2869
2870 std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
2871 Id instance_index{};
2872 Id vertex_index{};
2873 Id base_instance{};
2874 Id base_vertex{};
2875 Id frag_depth{};
2876 Id frag_coord{};
2877 Id front_facing{};
2878 Id point_coord{};
2879 Id tess_level_outer{};
2880 Id tess_level_inner{};
2881 Id tess_coord{};
2882 Id invocation_id{};
2883 Id workgroup_id{};
2884 Id local_invocation_id{};
2885 Id thread_id{};
2886 std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt
2887
2888 VertexIndices in_indices;
2889 VertexIndices out_indices;
2890
2891 std::vector<Id> interfaces;
2892
2893 Id jmp_to{};
2894 Id ssy_flow_stack_top{};
2895 Id pbk_flow_stack_top{};
2896 Id ssy_flow_stack{};
2897 Id pbk_flow_stack{};
2898 Id continue_label{};
2899 std::map<u32, Id> labels;
2900
2901 bool conditional_branch_set{};
2902 bool inside_branch{};
2903};
2904
2905class ExprDecompiler {
2906public:
2907 explicit ExprDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {}
2908
2909 Id operator()(const ExprAnd& expr) {
2910 const Id type_def = decomp.GetTypeDefinition(Type::Bool);
2911 const Id op1 = Visit(expr.operand1);
2912 const Id op2 = Visit(expr.operand2);
2913 return decomp.OpLogicalAnd(type_def, op1, op2);
2914 }
2915
2916 Id operator()(const ExprOr& expr) {
2917 const Id type_def = decomp.GetTypeDefinition(Type::Bool);
2918 const Id op1 = Visit(expr.operand1);
2919 const Id op2 = Visit(expr.operand2);
2920 return decomp.OpLogicalOr(type_def, op1, op2);
2921 }
2922
2923 Id operator()(const ExprNot& expr) {
2924 const Id type_def = decomp.GetTypeDefinition(Type::Bool);
2925 const Id op1 = Visit(expr.operand1);
2926 return decomp.OpLogicalNot(type_def, op1);
2927 }
2928
2929 Id operator()(const ExprPredicate& expr) {
2930 const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
2931 return decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred));
2932 }
2933
2934 Id operator()(const ExprCondCode& expr) {
2935 return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc)));
2936 }
2937
2938 Id operator()(const ExprVar& expr) {
2939 return decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index));
2940 }
2941
2942 Id operator()(const ExprBoolean& expr) {
2943 return expr.value ? decomp.v_true : decomp.v_false;
2944 }
2945
2946 Id operator()(const ExprGprEqual& expr) {
2947 const Id target = decomp.Constant(decomp.t_uint, expr.value);
2948 Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr));
2949 gpr = decomp.OpBitcast(decomp.t_uint, gpr);
2950 return decomp.OpIEqual(decomp.t_bool, gpr, target);
2951 }
2952
2953 Id Visit(const Expr& node) {
2954 return std::visit(*this, *node);
2955 }
2956
2957private:
2958 SPIRVDecompiler& decomp;
2959};
2960
2961class ASTDecompiler {
2962public:
2963 explicit ASTDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {}
2964
2965 void operator()(const ASTProgram& ast) {
2966 ASTNode current = ast.nodes.GetFirst();
2967 while (current) {
2968 Visit(current);
2969 current = current->GetNext();
2970 }
2971 }
2972
2973 void operator()(const ASTIfThen& ast) {
2974 ExprDecompiler expr_parser{decomp};
2975 const Id condition = expr_parser.Visit(ast.condition);
2976 const Id then_label = decomp.OpLabel();
2977 const Id endif_label = decomp.OpLabel();
2978 decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
2979 decomp.OpBranchConditional(condition, then_label, endif_label);
2980 decomp.AddLabel(then_label);
2981 ASTNode current = ast.nodes.GetFirst();
2982 while (current) {
2983 Visit(current);
2984 current = current->GetNext();
2985 }
2986 decomp.OpBranch(endif_label);
2987 decomp.AddLabel(endif_label);
2988 }
2989
2990 void operator()([[maybe_unused]] const ASTIfElse& ast) {
2991 UNREACHABLE();
2992 }
2993
2994 void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
2995 UNREACHABLE();
2996 }
2997
2998 void operator()(const ASTBlockDecoded& ast) {
2999 decomp.VisitBasicBlock(ast.nodes);
3000 }
3001
3002 void operator()(const ASTVarSet& ast) {
3003 ExprDecompiler expr_parser{decomp};
3004 const Id condition = expr_parser.Visit(ast.condition);
3005 decomp.OpStore(decomp.flow_variables.at(ast.index), condition);
3006 }
3007
3008 void operator()([[maybe_unused]] const ASTLabel& ast) {
3009 // Do nothing
3010 }
3011
3012 void operator()([[maybe_unused]] const ASTGoto& ast) {
3013 UNREACHABLE();
3014 }
3015
3016 void operator()(const ASTDoWhile& ast) {
3017 const Id loop_label = decomp.OpLabel();
3018 const Id endloop_label = decomp.OpLabel();
3019 const Id loop_start_block = decomp.OpLabel();
3020 const Id loop_continue_block = decomp.OpLabel();
3021 current_loop_exit = endloop_label;
3022 decomp.OpBranch(loop_label);
3023 decomp.AddLabel(loop_label);
3024 decomp.OpLoopMerge(endloop_label, loop_continue_block, spv::LoopControlMask::MaskNone);
3025 decomp.OpBranch(loop_start_block);
3026 decomp.AddLabel(loop_start_block);
3027 ASTNode current = ast.nodes.GetFirst();
3028 while (current) {
3029 Visit(current);
3030 current = current->GetNext();
3031 }
3032 decomp.OpBranch(loop_continue_block);
3033 decomp.AddLabel(loop_continue_block);
3034 ExprDecompiler expr_parser{decomp};
3035 const Id condition = expr_parser.Visit(ast.condition);
3036 decomp.OpBranchConditional(condition, loop_label, endloop_label);
3037 decomp.AddLabel(endloop_label);
3038 }
3039
3040 void operator()(const ASTReturn& ast) {
3041 if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
3042 ExprDecompiler expr_parser{decomp};
3043 const Id condition = expr_parser.Visit(ast.condition);
3044 const Id then_label = decomp.OpLabel();
3045 const Id endif_label = decomp.OpLabel();
3046 decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
3047 decomp.OpBranchConditional(condition, then_label, endif_label);
3048 decomp.AddLabel(then_label);
3049 if (ast.kills) {
3050 decomp.OpKill();
3051 } else {
3052 decomp.PreExit();
3053 decomp.OpReturn();
3054 }
3055 decomp.AddLabel(endif_label);
3056 } else {
3057 const Id next_block = decomp.OpLabel();
3058 decomp.OpBranch(next_block);
3059 decomp.AddLabel(next_block);
3060 if (ast.kills) {
3061 decomp.OpKill();
3062 } else {
3063 decomp.PreExit();
3064 decomp.OpReturn();
3065 }
3066 decomp.AddLabel(decomp.OpLabel());
3067 }
3068 }
3069
3070 void operator()(const ASTBreak& ast) {
3071 if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
3072 ExprDecompiler expr_parser{decomp};
3073 const Id condition = expr_parser.Visit(ast.condition);
3074 const Id then_label = decomp.OpLabel();
3075 const Id endif_label = decomp.OpLabel();
3076 decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
3077 decomp.OpBranchConditional(condition, then_label, endif_label);
3078 decomp.AddLabel(then_label);
3079 decomp.OpBranch(current_loop_exit);
3080 decomp.AddLabel(endif_label);
3081 } else {
3082 const Id next_block = decomp.OpLabel();
3083 decomp.OpBranch(next_block);
3084 decomp.AddLabel(next_block);
3085 decomp.OpBranch(current_loop_exit);
3086 decomp.AddLabel(decomp.OpLabel());
3087 }
3088 }
3089
3090 void Visit(const ASTNode& node) {
3091 std::visit(*this, *node->GetInnerData());
3092 }
3093
3094private:
3095 SPIRVDecompiler& decomp;
3096 Id current_loop_exit{};
3097};
3098
3099void SPIRVDecompiler::DecompileAST() {
3100 const u32 num_flow_variables = ir.GetASTNumVariables();
3101 for (u32 i = 0; i < num_flow_variables; i++) {
3102 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
3103 Name(id, fmt::format("flow_var_{}", i));
3104 flow_variables.emplace(i, AddGlobalVariable(id));
3105 }
3106
3107 DefinePrologue();
3108
3109 const ASTNode program = ir.GetASTProgram();
3110 ASTDecompiler decompiler{*this};
3111 decompiler.Visit(program);
3112
3113 const Id next_block = OpLabel();
3114 OpBranch(next_block);
3115 AddLabel(next_block);
3116}
3117
3118} // Anonymous namespace
3119
3120ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
3121 ShaderEntries entries;
3122 for (const auto& cbuf : ir.GetConstantBuffers()) {
3123 entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
3124 }
3125 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
3126 entries.global_buffers.emplace_back(GlobalBufferEntry{
3127 .cbuf_index = base.cbuf_index,
3128 .cbuf_offset = base.cbuf_offset,
3129 .is_written = usage.is_written,
3130 });
3131 }
3132 for (const auto& sampler : ir.GetSamplers()) {
3133 if (sampler.is_buffer) {
3134 entries.uniform_texels.emplace_back(sampler);
3135 } else {
3136 entries.samplers.emplace_back(sampler);
3137 }
3138 }
3139 for (const auto& image : ir.GetImages()) {
3140 if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
3141 entries.storage_texels.emplace_back(image);
3142 } else {
3143 entries.images.emplace_back(image);
3144 }
3145 }
3146 for (const auto& attribute : ir.GetInputAttributes()) {
3147 if (IsGenericAttribute(attribute)) {
3148 entries.attributes.insert(GetGenericAttributeLocation(attribute));
3149 }
3150 }
3151 for (const auto& buffer : entries.const_buffers) {
3152 entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
3153 }
3154 entries.clip_distances = ir.GetClipDistances();
3155 entries.shader_length = ir.GetLength();
3156 entries.uses_warps = ir.UsesWarps();
3157 return entries;
3158}
3159
3160std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
3161 ShaderType stage, const VideoCommon::Shader::Registry& registry,
3162 const Specialization& specialization) {
3163 return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
3164}
3165
3166} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
deleted file mode 100644
index 5d94132a5..000000000
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ /dev/null
@@ -1,99 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <set>
9#include <vector>
10
11#include "common/common_types.h"
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/engines/shader_type.h"
14#include "video_core/shader/registry.h"
15#include "video_core/shader/shader_ir.h"
16
17namespace Vulkan {
18
19class Device;
20
21using Maxwell = Tegra::Engines::Maxwell3D::Regs;
22using UniformTexelEntry = VideoCommon::Shader::SamplerEntry;
23using SamplerEntry = VideoCommon::Shader::SamplerEntry;
24using StorageTexelEntry = VideoCommon::Shader::ImageEntry;
25using ImageEntry = VideoCommon::Shader::ImageEntry;
26
27constexpr u32 DESCRIPTOR_SET = 0;
28
29class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
30public:
31 explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_)
32 : ConstBuffer{entry_}, index{index_} {}
33
34 constexpr u32 GetIndex() const {
35 return index;
36 }
37
38private:
39 u32 index{};
40};
41
42struct GlobalBufferEntry {
43 u32 cbuf_index{};
44 u32 cbuf_offset{};
45 bool is_written{};
46};
47
48struct ShaderEntries {
49 u32 NumBindings() const {
50 return static_cast<u32>(const_buffers.size() + global_buffers.size() +
51 uniform_texels.size() + samplers.size() + storage_texels.size() +
52 images.size());
53 }
54
55 std::vector<ConstBufferEntry> const_buffers;
56 std::vector<GlobalBufferEntry> global_buffers;
57 std::vector<UniformTexelEntry> uniform_texels;
58 std::vector<SamplerEntry> samplers;
59 std::vector<StorageTexelEntry> storage_texels;
60 std::vector<ImageEntry> images;
61 std::set<u32> attributes;
62 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
63 std::size_t shader_length{};
64 u32 enabled_uniform_buffers{};
65 bool uses_warps{};
66};
67
68struct Specialization final {
69 u32 base_binding{};
70
71 // Compute specific
72 std::array<u32, 3> workgroup_size{};
73 u32 shared_memory_size{};
74
75 // Graphics specific
76 std::optional<float> point_size;
77 std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
78 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
79 bool ndc_minus_one_to_one{};
80 bool early_fragment_tests{};
81 float alpha_test_ref{};
82 Maxwell::ComparisonOp alpha_test_func{};
83};
84// Old gcc versions don't consider this trivially copyable.
85// static_assert(std::is_trivially_copyable_v<Specialization>);
86
87struct SPIRVShader {
88 std::vector<u32> code;
89 ShaderEntries entries;
90};
91
92ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
93
94std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
95 Tegra::Engines::ShaderType stage,
96 const VideoCommon::Shader::Registry& registry,
97 const Specialization& specialization);
98
99} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 0412b5234..555b12ed7 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -91,7 +91,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
91 .flags = 0, 91 .flags = 0,
92 .size = STREAM_BUFFER_SIZE, 92 .size = STREAM_BUFFER_SIZE,
93 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | 93 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
94 VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 94 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
95 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 95 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
96 .queueFamilyIndexCount = 0, 96 .queueFamilyIndexCount = 0,
97 .pQueueFamilyIndices = nullptr, 97 .pQueueFamilyIndices = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 956f86845..e3b7dd61c 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -29,9 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags;
29 29
30Flags MakeInvalidationFlags() { 30Flags MakeInvalidationFlags() {
31 static constexpr int INVALIDATION_FLAGS[]{ 31 static constexpr int INVALIDATION_FLAGS[]{
32 Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, 32 Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
33 StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, 33 StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable,
34 DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers, 34 DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
35 VertexBuffers, VertexInput,
35 }; 36 };
36 Flags flags{}; 37 Flags flags{};
37 for (const int flag : INVALIDATION_FLAGS) { 38 for (const int flag : INVALIDATION_FLAGS) {
@@ -40,6 +41,12 @@ Flags MakeInvalidationFlags() {
40 for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) { 41 for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
41 flags[index] = true; 42 flags[index] = true;
42 } 43 }
44 for (int index = VertexAttribute0; index <= VertexAttribute31; ++index) {
45 flags[index] = true;
46 }
47 for (int index = VertexBinding0; index <= VertexBinding31; ++index) {
48 flags[index] = true;
49 }
43 return flags; 50 return flags;
44} 51}
45 52
@@ -79,6 +86,11 @@ void SetupDirtyStencilProperties(Tables& tables) {
79 table[OFF(stencil_back_func_mask)] = StencilProperties; 86 table[OFF(stencil_back_func_mask)] = StencilProperties;
80} 87}
81 88
89void SetupDirtyLineWidth(Tables& tables) {
90 tables[0][OFF(line_width_smooth)] = LineWidth;
91 tables[0][OFF(line_width_aliased)] = LineWidth;
92}
93
82void SetupDirtyCullMode(Tables& tables) { 94void SetupDirtyCullMode(Tables& tables) {
83 auto& table = tables[0]; 95 auto& table = tables[0];
84 table[OFF(cull_face)] = CullMode; 96 table[OFF(cull_face)] = CullMode;
@@ -134,31 +146,38 @@ void SetupDirtyBlending(Tables& tables) {
134 FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending); 146 FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending);
135} 147}
136 148
137void SetupDirtyInstanceDivisors(Tables& tables) { 149void SetupDirtyViewportSwizzles(Tables& tables) {
138 static constexpr size_t divisor_offset = 3; 150 static constexpr size_t swizzle_offset = 6;
139 for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { 151 for (size_t index = 0; index < Regs::NumViewports; ++index) {
140 tables[0][OFF(instanced_arrays) + index] = InstanceDivisors; 152 tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
141 tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] = 153 ViewportSwizzles;
142 InstanceDivisors;
143 } 154 }
144} 155}
145 156
146void SetupDirtyVertexAttributes(Tables& tables) { 157void SetupDirtyVertexAttributes(Tables& tables) {
147 FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes); 158 for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
159 const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
160 FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i);
161 }
162 FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput);
148} 163}
149 164
150void SetupDirtyViewportSwizzles(Tables& tables) { 165void SetupDirtyVertexBindings(Tables& tables) {
151 static constexpr size_t swizzle_offset = 6; 166 // Do NOT include stride here, it's implicit in VertexBuffer
152 for (size_t index = 0; index < Regs::NumViewports; ++index) { 167 static constexpr size_t divisor_offset = 3;
153 tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = 168 for (size_t i = 0; i < Regs::NumVertexArrays; ++i) {
154 ViewportSwizzles; 169 const u8 flag = static_cast<u8>(VertexBinding0 + i);
170 tables[0][OFF(instanced_arrays) + i] = VertexInput;
171 tables[1][OFF(instanced_arrays) + i] = flag;
172 tables[0][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = VertexInput;
173 tables[1][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = flag;
155 } 174 }
156} 175}
157} // Anonymous namespace 176} // Anonymous namespace
158 177
159StateTracker::StateTracker(Tegra::GPU& gpu) 178StateTracker::StateTracker(Tegra::GPU& gpu)
160 : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { 179 : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
161 auto& tables = gpu.Maxwell3D().dirty.tables; 180 auto& tables{gpu.Maxwell3D().dirty.tables};
162 SetupDirtyFlags(tables); 181 SetupDirtyFlags(tables);
163 SetupDirtyViewports(tables); 182 SetupDirtyViewports(tables);
164 SetupDirtyScissors(tables); 183 SetupDirtyScissors(tables);
@@ -166,6 +185,7 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
166 SetupDirtyBlendConstants(tables); 185 SetupDirtyBlendConstants(tables);
167 SetupDirtyDepthBounds(tables); 186 SetupDirtyDepthBounds(tables);
168 SetupDirtyStencilProperties(tables); 187 SetupDirtyStencilProperties(tables);
188 SetupDirtyLineWidth(tables);
169 SetupDirtyCullMode(tables); 189 SetupDirtyCullMode(tables);
170 SetupDirtyDepthBoundsEnable(tables); 190 SetupDirtyDepthBoundsEnable(tables);
171 SetupDirtyDepthTestEnable(tables); 191 SetupDirtyDepthTestEnable(tables);
@@ -175,9 +195,9 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
175 SetupDirtyStencilOp(tables); 195 SetupDirtyStencilOp(tables);
176 SetupDirtyStencilTestEnable(tables); 196 SetupDirtyStencilTestEnable(tables);
177 SetupDirtyBlending(tables); 197 SetupDirtyBlending(tables);
178 SetupDirtyInstanceDivisors(tables);
179 SetupDirtyVertexAttributes(tables);
180 SetupDirtyViewportSwizzles(tables); 198 SetupDirtyViewportSwizzles(tables);
199 SetupDirtyVertexAttributes(tables);
200 SetupDirtyVertexBindings(tables);
181} 201}
182 202
183} // namespace Vulkan 203} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 84e918a71..5f78f6950 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -19,12 +19,19 @@ namespace Dirty {
19enum : u8 { 19enum : u8 {
20 First = VideoCommon::Dirty::LastCommonEntry, 20 First = VideoCommon::Dirty::LastCommonEntry,
21 21
22 VertexInput,
23 VertexAttribute0,
24 VertexAttribute31 = VertexAttribute0 + 31,
25 VertexBinding0,
26 VertexBinding31 = VertexBinding0 + 31,
27
22 Viewports, 28 Viewports,
23 Scissors, 29 Scissors,
24 DepthBias, 30 DepthBias,
25 BlendConstants, 31 BlendConstants,
26 DepthBounds, 32 DepthBounds,
27 StencilProperties, 33 StencilProperties,
34 LineWidth,
28 35
29 CullMode, 36 CullMode,
30 DepthBoundsEnable, 37 DepthBoundsEnable,
@@ -36,11 +43,9 @@ enum : u8 {
36 StencilTestEnable, 43 StencilTestEnable,
37 44
38 Blending, 45 Blending,
39 InstanceDivisors,
40 VertexAttributes,
41 ViewportSwizzles, 46 ViewportSwizzles,
42 47
43 Last 48 Last,
44}; 49};
45static_assert(Last <= std::numeric_limits<u8>::max()); 50static_assert(Last <= std::numeric_limits<u8>::max());
46 51
@@ -89,6 +94,10 @@ public:
89 return Exchange(Dirty::StencilProperties, false); 94 return Exchange(Dirty::StencilProperties, false);
90 } 95 }
91 96
97 bool TouchLineWidth() const {
98 return Exchange(Dirty::LineWidth, false);
99 }
100
92 bool TouchCullMode() { 101 bool TouchCullMode() {
93 return Exchange(Dirty::CullMode, false); 102 return Exchange(Dirty::CullMode, false);
94 } 103 }
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index dfd5c65ba..d990eefba 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -65,6 +65,9 @@ VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKSchedul
65VKSwapchain::~VKSwapchain() = default; 65VKSwapchain::~VKSwapchain() = default;
66 66
67void VKSwapchain::Create(u32 width, u32 height, bool srgb) { 67void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
68 is_outdated = false;
69 is_suboptimal = false;
70
68 const auto physical_device = device.GetPhysical(); 71 const auto physical_device = device.GetPhysical();
69 const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; 72 const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)};
70 if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { 73 if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
@@ -82,21 +85,31 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
82 resource_ticks.resize(image_count); 85 resource_ticks.resize(image_count);
83} 86}
84 87
85bool VKSwapchain::AcquireNextImage() { 88void VKSwapchain::AcquireNextImage() {
86 const VkResult result = 89 const VkResult result = device.GetLogical().AcquireNextImageKHR(
87 device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(), 90 *swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index],
88 *present_semaphores[frame_index], {}, &image_index); 91 VK_NULL_HANDLE, &image_index);
89 92 switch (result) {
93 case VK_SUCCESS:
94 break;
95 case VK_SUBOPTIMAL_KHR:
96 is_suboptimal = true;
97 break;
98 case VK_ERROR_OUT_OF_DATE_KHR:
99 is_outdated = true;
100 break;
101 default:
102 LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result));
103 break;
104 }
90 scheduler.Wait(resource_ticks[image_index]); 105 scheduler.Wait(resource_ticks[image_index]);
91 return result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR; 106 resource_ticks[image_index] = scheduler.CurrentTick();
92} 107}
93 108
94bool VKSwapchain::Present(VkSemaphore render_semaphore) { 109void VKSwapchain::Present(VkSemaphore render_semaphore) {
95 const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; 110 const VkSemaphore present_semaphore{*present_semaphores[frame_index]};
96 const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore}; 111 const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};
97 const auto present_queue{device.GetPresentQueue()}; 112 const auto present_queue{device.GetPresentQueue()};
98 bool recreated = false;
99
100 const VkPresentInfoKHR present_info{ 113 const VkPresentInfoKHR present_info{
101 .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, 114 .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
102 .pNext = nullptr, 115 .pNext = nullptr,
@@ -107,7 +120,6 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) {
107 .pImageIndices = &image_index, 120 .pImageIndices = &image_index,
108 .pResults = nullptr, 121 .pResults = nullptr,
109 }; 122 };
110
111 switch (const VkResult result = present_queue.Present(present_info)) { 123 switch (const VkResult result = present_queue.Present(present_info)) {
112 case VK_SUCCESS: 124 case VK_SUCCESS:
113 break; 125 break;
@@ -115,24 +127,16 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) {
115 LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); 127 LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain");
116 break; 128 break;
117 case VK_ERROR_OUT_OF_DATE_KHR: 129 case VK_ERROR_OUT_OF_DATE_KHR:
118 if (current_width > 0 && current_height > 0) { 130 is_outdated = true;
119 Create(current_width, current_height, current_srgb);
120 recreated = true;
121 }
122 break; 131 break;
123 default: 132 default:
124 LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); 133 LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result));
125 break; 134 break;
126 } 135 }
127 136 ++frame_index;
128 resource_ticks[image_index] = scheduler.CurrentTick(); 137 if (frame_index >= image_count) {
129 frame_index = (frame_index + 1) % static_cast<u32>(image_count); 138 frame_index = 0;
130 return recreated; 139 }
131}
132
133bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const {
134 // TODO(Rodrigo): Handle framebuffer pixel format changes
135 return framebuffer.width != current_width || framebuffer.height != current_height;
136} 140}
137 141
138void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, 142void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width,
@@ -148,7 +152,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
148 if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { 152 if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
149 requested_image_count = capabilities.maxImageCount; 153 requested_image_count = capabilities.maxImageCount;
150 } 154 }
151
152 VkSwapchainCreateInfoKHR swapchain_ci{ 155 VkSwapchainCreateInfoKHR swapchain_ci{
153 .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, 156 .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
154 .pNext = nullptr, 157 .pNext = nullptr,
@@ -169,7 +172,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
169 .clipped = VK_FALSE, 172 .clipped = VK_FALSE,
170 .oldSwapchain = nullptr, 173 .oldSwapchain = nullptr,
171 }; 174 };
172
173 const u32 graphics_family{device.GetGraphicsFamily()}; 175 const u32 graphics_family{device.GetGraphicsFamily()};
174 const u32 present_family{device.GetPresentFamily()}; 176 const u32 present_family{device.GetPresentFamily()};
175 const std::array<u32, 2> queue_indices{graphics_family, present_family}; 177 const std::array<u32, 2> queue_indices{graphics_family, present_family};
@@ -178,7 +180,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
178 swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); 180 swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
179 swapchain_ci.pQueueFamilyIndices = queue_indices.data(); 181 swapchain_ci.pQueueFamilyIndices = queue_indices.data();
180 } 182 }
181
182 // Request the size again to reduce the possibility of a TOCTOU race condition. 183 // Request the size again to reduce the possibility of a TOCTOU race condition.
183 const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); 184 const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface);
184 swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); 185 swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
@@ -186,8 +187,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
186 swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci); 187 swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci);
187 188
188 extent = swapchain_ci.imageExtent; 189 extent = swapchain_ci.imageExtent;
189 current_width = extent.width;
190 current_height = extent.height;
191 current_srgb = srgb; 190 current_srgb = srgb;
192 191
193 images = swapchain.GetImages(); 192 images = swapchain.GetImages();
@@ -197,8 +196,8 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
197 196
198void VKSwapchain::CreateSemaphores() { 197void VKSwapchain::CreateSemaphores() {
199 present_semaphores.resize(image_count); 198 present_semaphores.resize(image_count);
200 std::generate(present_semaphores.begin(), present_semaphores.end(), 199 std::ranges::generate(present_semaphores,
201 [this] { return device.GetLogical().CreateSemaphore(); }); 200 [this] { return device.GetLogical().CreateSemaphore(); });
202} 201}
203 202
204void VKSwapchain::CreateImageViews() { 203void VKSwapchain::CreateImageViews() {
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index adc8d27cf..35c2cdc14 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -28,14 +28,25 @@ public:
28 void Create(u32 width, u32 height, bool srgb); 28 void Create(u32 width, u32 height, bool srgb);
29 29
30 /// Acquires the next image in the swapchain, waits as needed. 30 /// Acquires the next image in the swapchain, waits as needed.
31 bool AcquireNextImage(); 31 void AcquireNextImage();
32 32
33 /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be 33 /// Presents the rendered image to the swapchain.
34 /// recreated. Takes responsability for the ownership of fence. 34 void Present(VkSemaphore render_semaphore);
35 bool Present(VkSemaphore render_semaphore);
36 35
37 /// Returns true when the framebuffer layout has changed. 36 /// Returns true when the color space has changed.
38 bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const; 37 bool HasColorSpaceChanged(bool is_srgb) const {
38 return current_srgb != is_srgb;
39 }
40
41 /// Returns true when the swapchain is outdated.
42 bool IsOutDated() const {
43 return is_outdated;
44 }
45
46 /// Returns true when the swapchain is suboptimal.
47 bool IsSubOptimal() const {
48 return is_suboptimal;
49 }
39 50
40 VkExtent2D GetSize() const { 51 VkExtent2D GetSize() const {
41 return extent; 52 return extent;
@@ -61,10 +72,6 @@ public:
61 return image_format; 72 return image_format;
62 } 73 }
63 74
64 bool GetSrgbState() const {
65 return current_srgb;
66 }
67
68private: 75private:
69 void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, 76 void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
70 bool srgb); 77 bool srgb);
@@ -92,9 +99,9 @@ private:
92 VkFormat image_format{}; 99 VkFormat image_format{};
93 VkExtent2D extent{}; 100 VkExtent2D extent{};
94 101
95 u32 current_width{};
96 u32 current_height{};
97 bool current_srgb{}; 102 bool current_srgb{};
103 bool is_outdated{};
104 bool is_suboptimal{};
98}; 105};
99 106
100} // namespace Vulkan 107} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 88ccf96f5..8e029bcb3 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -15,6 +15,7 @@
15#include "video_core/renderer_vulkan/maxwell_to_vk.h" 15#include "video_core/renderer_vulkan/maxwell_to_vk.h"
16#include "video_core/renderer_vulkan/vk_compute_pass.h" 16#include "video_core/renderer_vulkan/vk_compute_pass.h"
17#include "video_core/renderer_vulkan/vk_rasterizer.h" 17#include "video_core/renderer_vulkan/vk_rasterizer.h"
18#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
18#include "video_core/renderer_vulkan/vk_scheduler.h" 19#include "video_core/renderer_vulkan/vk_scheduler.h"
19#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 20#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
20#include "video_core/renderer_vulkan/vk_texture_cache.h" 21#include "video_core/renderer_vulkan/vk_texture_cache.h"
@@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange;
34using VideoCore::Surface::IsPixelFormatASTC; 35using VideoCore::Surface::IsPixelFormatASTC;
35 36
36namespace { 37namespace {
37
38constexpr std::array ATTACHMENT_REFERENCES{
39 VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL},
40 VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL},
41 VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL},
42 VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL},
43 VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL},
44 VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL},
45 VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL},
46 VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL},
47 VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL},
48};
49
50constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { 38constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
51 if (color == std::array<float, 4>{0, 0, 0, 0}) { 39 if (color == std::array<float, 4>{0, 0, 0, 0}) {
52 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; 40 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
@@ -174,25 +162,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
174 return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); 162 return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info));
175} 163}
176 164
177[[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) {
178 if (info.type != ImageType::Buffer) {
179 return vk::Buffer{};
180 }
181 const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format);
182 return device.GetLogical().CreateBuffer(VkBufferCreateInfo{
183 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
184 .pNext = nullptr,
185 .flags = 0,
186 .size = info.size.width * bytes_per_block,
187 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
188 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
189 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
190 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
191 .queueFamilyIndexCount = 0,
192 .pQueueFamilyIndices = nullptr,
193 });
194}
195
196[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { 165[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) {
197 switch (VideoCore::Surface::GetFormatType(format)) { 166 switch (VideoCore::Surface::GetFormatType(format)) {
198 case VideoCore::Surface::SurfaceType::ColorTexture: 167 case VideoCore::Surface::SurfaceType::ColorTexture:
@@ -226,23 +195,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
226 } 195 }
227} 196}
228 197
229[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device,
230 const ImageView* image_view) {
231 using MaxwellToVK::SurfaceFormat;
232 const PixelFormat pixel_format = image_view->format;
233 return VkAttachmentDescription{
234 .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
235 .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format,
236 .samples = image_view->Samples(),
237 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
238 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
239 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
240 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
241 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
242 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
243 };
244}
245
246[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { 198[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) {
247 switch (swizzle) { 199 switch (swizzle) {
248 case SwizzleSource::Zero: 200 case SwizzleSource::Zero:
@@ -263,6 +215,30 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
263 return VK_COMPONENT_SWIZZLE_ZERO; 215 return VK_COMPONENT_SWIZZLE_ZERO;
264} 216}
265 217
218[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type) {
219 switch (type) {
220 case Shader::TextureType::Color1D:
221 return VK_IMAGE_VIEW_TYPE_1D;
222 case Shader::TextureType::Color2D:
223 return VK_IMAGE_VIEW_TYPE_2D;
224 case Shader::TextureType::ColorCube:
225 return VK_IMAGE_VIEW_TYPE_CUBE;
226 case Shader::TextureType::Color3D:
227 return VK_IMAGE_VIEW_TYPE_3D;
228 case Shader::TextureType::ColorArray1D:
229 return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
230 case Shader::TextureType::ColorArray2D:
231 return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
232 case Shader::TextureType::ColorArrayCube:
233 return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
234 case Shader::TextureType::Buffer:
235 UNREACHABLE_MSG("Texture buffers can't be image views");
236 return VK_IMAGE_VIEW_TYPE_1D;
237 }
238 UNREACHABLE_MSG("Invalid image view type={}", type);
239 return VK_IMAGE_VIEW_TYPE_2D;
240}
241
266[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { 242[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) {
267 switch (type) { 243 switch (type) {
268 case VideoCommon::ImageViewType::e1D: 244 case VideoCommon::ImageViewType::e1D:
@@ -280,7 +256,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
280 case VideoCommon::ImageViewType::CubeArray: 256 case VideoCommon::ImageViewType::CubeArray:
281 return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; 257 return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
282 case VideoCommon::ImageViewType::Rect: 258 case VideoCommon::ImageViewType::Rect:
283 LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported"); 259 UNIMPLEMENTED_MSG("Rect image view");
284 return VK_IMAGE_VIEW_TYPE_2D; 260 return VK_IMAGE_VIEW_TYPE_2D;
285 case VideoCommon::ImageViewType::Buffer: 261 case VideoCommon::ImageViewType::Buffer:
286 UNREACHABLE_MSG("Texture buffers can't be image views"); 262 UNREACHABLE_MSG("Texture buffers can't be image views");
@@ -327,7 +303,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
327 }; 303 };
328} 304}
329 305
330[[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( 306[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
331 std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { 307 std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
332 std::vector<VkBufferCopy> result(copies.size()); 308 std::vector<VkBufferCopy> result(copies.size());
333 std::ranges::transform( 309 std::ranges::transform(
@@ -587,6 +563,28 @@ struct RangedBarrierRange {
587 } 563 }
588}; 564};
589 565
566[[nodiscard]] VkFormat Format(Shader::ImageFormat format) {
567 switch (format) {
568 case Shader::ImageFormat::Typeless:
569 break;
570 case Shader::ImageFormat::R8_SINT:
571 return VK_FORMAT_R8_SINT;
572 case Shader::ImageFormat::R8_UINT:
573 return VK_FORMAT_R8_UINT;
574 case Shader::ImageFormat::R16_UINT:
575 return VK_FORMAT_R16_UINT;
576 case Shader::ImageFormat::R16_SINT:
577 return VK_FORMAT_R16_SINT;
578 case Shader::ImageFormat::R32_UINT:
579 return VK_FORMAT_R32_UINT;
580 case Shader::ImageFormat::R32G32_UINT:
581 return VK_FORMAT_R32G32_UINT;
582 case Shader::ImageFormat::R32G32B32A32_UINT:
583 return VK_FORMAT_R32G32B32A32_UINT;
584 }
585 UNREACHABLE_MSG("Invalid image format={}", format);
586 return VK_FORMAT_R32_UINT;
587}
590} // Anonymous namespace 588} // Anonymous namespace
591 589
592void TextureCacheRuntime::Finish() { 590void TextureCacheRuntime::Finish() {
@@ -625,7 +623,7 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
625 return; 623 return;
626 } 624 }
627 } 625 }
628 ASSERT(src.ImageFormat() == dst.ImageFormat()); 626 ASSERT(src.format == dst.format);
629 ASSERT(!(is_dst_msaa && !is_src_msaa)); 627 ASSERT(!(is_dst_msaa && !is_src_msaa));
630 ASSERT(operation == Fermi2D::Operation::SrcCopy); 628 ASSERT(operation == Fermi2D::Operation::SrcCopy);
631 629
@@ -842,13 +840,9 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
842Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, 840Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_,
843 VAddr cpu_addr_) 841 VAddr cpu_addr_)
844 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, 842 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
845 image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), 843 image(MakeImage(runtime.device, info)),
844 commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)),
846 aspect_mask(ImageAspectMask(info.format)) { 845 aspect_mask(ImageAspectMask(info.format)) {
847 if (image) {
848 commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
849 } else {
850 commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
851 }
852 if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { 846 if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
853 if (Settings::values.accelerate_astc.GetValue()) { 847 if (Settings::values.accelerate_astc.GetValue()) {
854 flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; 848 flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
@@ -857,11 +851,7 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
857 } 851 }
858 } 852 }
859 if (runtime.device.HasDebuggingToolAttached()) { 853 if (runtime.device.HasDebuggingToolAttached()) {
860 if (image) { 854 image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
861 image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
862 } else {
863 buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
864 }
865 } 855 }
866 static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ 856 static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{
867 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, 857 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
@@ -913,19 +903,6 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag
913 }); 903 });
914} 904}
915 905
916void Image::UploadMemory(const StagingBufferRef& map,
917 std::span<const VideoCommon::BufferCopy> copies) {
918 // TODO: Move this to another API
919 scheduler->RequestOutsideRenderPassOperationContext();
920 std::vector vk_copies = TransformBufferCopies(copies, map.offset);
921 const VkBuffer src_buffer = map.buffer;
922 const VkBuffer dst_buffer = *buffer;
923 scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
924 // TODO: Barriers
925 cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
926 });
927}
928
929void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { 906void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
930 std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); 907 std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
931 scheduler->RequestOutsideRenderPassOperationContext(); 908 scheduler->RequestOutsideRenderPassOperationContext();
@@ -984,8 +961,9 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm
984ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, 961ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
985 ImageId image_id_, Image& image) 962 ImageId image_id_, Image& image)
986 : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, 963 : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device},
987 image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount( 964 image_handle{image.Handle()}, samples{ConvertSampleCount(image.info.num_samples)} {
988 image.info.num_samples)} { 965 using Shader::TextureType;
966
989 const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); 967 const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info);
990 std::array<SwizzleSource, 4> swizzle{ 968 std::array<SwizzleSource, 4> swizzle{
991 SwizzleSource::R, 969 SwizzleSource::R,
@@ -1023,57 +1001,54 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
1023 }, 1001 },
1024 .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), 1002 .subresourceRange = MakeSubresourceRange(aspect_mask, info.range),
1025 }; 1003 };
1026 const auto create = [&](VideoCommon::ImageViewType view_type, std::optional<u32> num_layers) { 1004 const auto create = [&](TextureType tex_type, std::optional<u32> num_layers) {
1027 VkImageViewCreateInfo ci{create_info}; 1005 VkImageViewCreateInfo ci{create_info};
1028 ci.viewType = ImageViewType(view_type); 1006 ci.viewType = ImageViewType(tex_type);
1029 if (num_layers) { 1007 if (num_layers) {
1030 ci.subresourceRange.layerCount = *num_layers; 1008 ci.subresourceRange.layerCount = *num_layers;
1031 } 1009 }
1032 vk::ImageView handle = device->GetLogical().CreateImageView(ci); 1010 vk::ImageView handle = device->GetLogical().CreateImageView(ci);
1033 if (device->HasDebuggingToolAttached()) { 1011 if (device->HasDebuggingToolAttached()) {
1034 handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str()); 1012 handle.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
1035 } 1013 }
1036 image_views[static_cast<size_t>(view_type)] = std::move(handle); 1014 image_views[static_cast<size_t>(tex_type)] = std::move(handle);
1037 }; 1015 };
1038 switch (info.type) { 1016 switch (info.type) {
1039 case VideoCommon::ImageViewType::e1D: 1017 case VideoCommon::ImageViewType::e1D:
1040 case VideoCommon::ImageViewType::e1DArray: 1018 case VideoCommon::ImageViewType::e1DArray:
1041 create(VideoCommon::ImageViewType::e1D, 1); 1019 create(TextureType::Color1D, 1);
1042 create(VideoCommon::ImageViewType::e1DArray, std::nullopt); 1020 create(TextureType::ColorArray1D, std::nullopt);
1043 render_target = Handle(VideoCommon::ImageViewType::e1DArray); 1021 render_target = Handle(TextureType::ColorArray1D);
1044 break; 1022 break;
1045 case VideoCommon::ImageViewType::e2D: 1023 case VideoCommon::ImageViewType::e2D:
1046 case VideoCommon::ImageViewType::e2DArray: 1024 case VideoCommon::ImageViewType::e2DArray:
1047 create(VideoCommon::ImageViewType::e2D, 1); 1025 create(TextureType::Color2D, 1);
1048 create(VideoCommon::ImageViewType::e2DArray, std::nullopt); 1026 create(TextureType::ColorArray2D, std::nullopt);
1049 render_target = Handle(VideoCommon::ImageViewType::e2DArray); 1027 render_target = Handle(Shader::TextureType::ColorArray2D);
1050 break; 1028 break;
1051 case VideoCommon::ImageViewType::e3D: 1029 case VideoCommon::ImageViewType::e3D:
1052 create(VideoCommon::ImageViewType::e3D, std::nullopt); 1030 create(TextureType::Color3D, std::nullopt);
1053 render_target = Handle(VideoCommon::ImageViewType::e3D); 1031 render_target = Handle(Shader::TextureType::Color3D);
1054 break; 1032 break;
1055 case VideoCommon::ImageViewType::Cube: 1033 case VideoCommon::ImageViewType::Cube:
1056 case VideoCommon::ImageViewType::CubeArray: 1034 case VideoCommon::ImageViewType::CubeArray:
1057 create(VideoCommon::ImageViewType::Cube, 6); 1035 create(TextureType::ColorCube, 6);
1058 create(VideoCommon::ImageViewType::CubeArray, std::nullopt); 1036 create(TextureType::ColorArrayCube, std::nullopt);
1059 break; 1037 break;
1060 case VideoCommon::ImageViewType::Rect: 1038 case VideoCommon::ImageViewType::Rect:
1061 UNIMPLEMENTED(); 1039 UNIMPLEMENTED();
1062 break; 1040 break;
1063 case VideoCommon::ImageViewType::Buffer: 1041 case VideoCommon::ImageViewType::Buffer:
1064 buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{ 1042 UNREACHABLE();
1065 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1066 .pNext = nullptr,
1067 .flags = 0,
1068 .buffer = image.Buffer(),
1069 .format = format_info.format,
1070 .offset = 0, // TODO: Redesign buffer cache to support this
1071 .range = image.guest_size_bytes,
1072 });
1073 break; 1043 break;
1074 } 1044 }
1075} 1045}
1076 1046
1047ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
1048 const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
1049 : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
1050 buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
1051
1077ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) 1052ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params)
1078 : VideoCommon::ImageViewBase{params} {} 1053 : VideoCommon::ImageViewBase{params} {}
1079 1054
@@ -1081,7 +1056,8 @@ VkImageView ImageView::DepthView() {
1081 if (depth_view) { 1056 if (depth_view) {
1082 return *depth_view; 1057 return *depth_view;
1083 } 1058 }
1084 depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT); 1059 const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
1060 depth_view = MakeView(info.format, VK_IMAGE_ASPECT_DEPTH_BIT);
1085 return *depth_view; 1061 return *depth_view;
1086} 1062}
1087 1063
@@ -1089,18 +1065,38 @@ VkImageView ImageView::StencilView() {
1089 if (stencil_view) { 1065 if (stencil_view) {
1090 return *stencil_view; 1066 return *stencil_view;
1091 } 1067 }
1092 stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT); 1068 const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
1069 stencil_view = MakeView(info.format, VK_IMAGE_ASPECT_STENCIL_BIT);
1093 return *stencil_view; 1070 return *stencil_view;
1094} 1071}
1095 1072
1096vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) { 1073VkImageView ImageView::StorageView(Shader::TextureType texture_type,
1074 Shader::ImageFormat image_format) {
1075 if (image_format == Shader::ImageFormat::Typeless) {
1076 return Handle(texture_type);
1077 }
1078 const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
1079 image_format == Shader::ImageFormat::R16_SINT};
1080 if (!storage_views) {
1081 storage_views = std::make_unique<StorageViews>();
1082 }
1083 auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds};
1084 auto& view{views[static_cast<size_t>(texture_type)]};
1085 if (view) {
1086 return *view;
1087 }
1088 view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT);
1089 return *view;
1090}
1091
1092vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) {
1097 return device->GetLogical().CreateImageView({ 1093 return device->GetLogical().CreateImageView({
1098 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 1094 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1099 .pNext = nullptr, 1095 .pNext = nullptr,
1100 .flags = 0, 1096 .flags = 0,
1101 .image = image_handle, 1097 .image = image_handle,
1102 .viewType = ImageViewType(type), 1098 .viewType = ImageViewType(type),
1103 .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format).format, 1099 .format = vk_format,
1104 .components{ 1100 .components{
1105 .r = VK_COMPONENT_SWIZZLE_IDENTITY, 1101 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
1106 .g = VK_COMPONENT_SWIZZLE_IDENTITY, 1102 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
@@ -1164,7 +1160,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
1164 1160
1165Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, 1161Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
1166 ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { 1162 ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
1167 std::vector<VkAttachmentDescription> descriptions;
1168 std::vector<VkImageView> attachments; 1163 std::vector<VkImageView> attachments;
1169 RenderPassKey renderpass_key{}; 1164 RenderPassKey renderpass_key{};
1170 s32 num_layers = 1; 1165 s32 num_layers = 1;
@@ -1175,7 +1170,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1175 renderpass_key.color_formats[index] = PixelFormat::Invalid; 1170 renderpass_key.color_formats[index] = PixelFormat::Invalid;
1176 continue; 1171 continue;
1177 } 1172 }
1178 descriptions.push_back(AttachmentDescription(runtime.device, color_buffer));
1179 attachments.push_back(color_buffer->RenderTarget()); 1173 attachments.push_back(color_buffer->RenderTarget());
1180 renderpass_key.color_formats[index] = color_buffer->format; 1174 renderpass_key.color_formats[index] = color_buffer->format;
1181 num_layers = std::max(num_layers, color_buffer->range.extent.layers); 1175 num_layers = std::max(num_layers, color_buffer->range.extent.layers);
@@ -1185,10 +1179,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1185 ++num_images; 1179 ++num_images;
1186 } 1180 }
1187 const size_t num_colors = attachments.size(); 1181 const size_t num_colors = attachments.size();
1188 const VkAttachmentReference* depth_attachment =
1189 depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr;
1190 if (depth_buffer) { 1182 if (depth_buffer) {
1191 descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer));
1192 attachments.push_back(depth_buffer->RenderTarget()); 1183 attachments.push_back(depth_buffer->RenderTarget());
1193 renderpass_key.depth_format = depth_buffer->format; 1184 renderpass_key.depth_format = depth_buffer->format;
1194 num_layers = std::max(num_layers, depth_buffer->range.extent.layers); 1185 num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
@@ -1201,40 +1192,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1201 } 1192 }
1202 renderpass_key.samples = samples; 1193 renderpass_key.samples = samples;
1203 1194
1204 const auto& device = runtime.device.GetLogical(); 1195 renderpass = runtime.render_pass_cache.Get(renderpass_key);
1205 const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key); 1196
1206 if (is_new) {
1207 const VkSubpassDescription subpass{
1208 .flags = 0,
1209 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
1210 .inputAttachmentCount = 0,
1211 .pInputAttachments = nullptr,
1212 .colorAttachmentCount = static_cast<u32>(num_colors),
1213 .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr,
1214 .pResolveAttachments = nullptr,
1215 .pDepthStencilAttachment = depth_attachment,
1216 .preserveAttachmentCount = 0,
1217 .pPreserveAttachments = nullptr,
1218 };
1219 cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{
1220 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
1221 .pNext = nullptr,
1222 .flags = 0,
1223 .attachmentCount = static_cast<u32>(descriptions.size()),
1224 .pAttachments = descriptions.data(),
1225 .subpassCount = 1,
1226 .pSubpasses = &subpass,
1227 .dependencyCount = 0,
1228 .pDependencies = nullptr,
1229 });
1230 }
1231 renderpass = *cache_pair->second;
1232 render_area = VkExtent2D{ 1197 render_area = VkExtent2D{
1233 .width = key.size.width, 1198 .width = key.size.width,
1234 .height = key.size.height, 1199 .height = key.size.height,
1235 }; 1200 };
1236 num_color_buffers = static_cast<u32>(num_colors); 1201 num_color_buffers = static_cast<u32>(num_colors);
1237 framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ 1202 framebuffer = runtime.device.GetLogical().CreateFramebuffer({
1238 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, 1203 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
1239 .pNext = nullptr, 1204 .pNext = nullptr,
1240 .flags = 0, 1205 .flags = 0,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 172bcdf98..0b73d55f8 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -7,6 +7,7 @@
7#include <compare> 7#include <compare>
8#include <span> 8#include <span>
9 9
10#include "shader_recompiler/shader_info.h"
10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 11#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
11#include "video_core/texture_cache/texture_cache.h" 12#include "video_core/texture_cache/texture_cache.h"
12#include "video_core/vulkan_common/vulkan_memory_allocator.h" 13#include "video_core/vulkan_common/vulkan_memory_allocator.h"
@@ -26,35 +27,10 @@ class Device;
26class Image; 27class Image;
27class ImageView; 28class ImageView;
28class Framebuffer; 29class Framebuffer;
30class RenderPassCache;
29class StagingBufferPool; 31class StagingBufferPool;
30class VKScheduler; 32class VKScheduler;
31 33
32struct RenderPassKey {
33 constexpr auto operator<=>(const RenderPassKey&) const noexcept = default;
34
35 std::array<PixelFormat, NUM_RT> color_formats;
36 PixelFormat depth_format;
37 VkSampleCountFlagBits samples;
38};
39
40} // namespace Vulkan
41
42namespace std {
43template <>
44struct hash<Vulkan::RenderPassKey> {
45 [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
46 size_t value = static_cast<size_t>(key.depth_format) << 48;
47 value ^= static_cast<size_t>(key.samples) << 52;
48 for (size_t i = 0; i < key.color_formats.size(); ++i) {
49 value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
50 }
51 return value;
52 }
53};
54} // namespace std
55
56namespace Vulkan {
57
58struct TextureCacheRuntime { 34struct TextureCacheRuntime {
59 const Device& device; 35 const Device& device;
60 VKScheduler& scheduler; 36 VKScheduler& scheduler;
@@ -62,13 +38,13 @@ struct TextureCacheRuntime {
62 StagingBufferPool& staging_buffer_pool; 38 StagingBufferPool& staging_buffer_pool;
63 BlitImageHelper& blit_image_helper; 39 BlitImageHelper& blit_image_helper;
64 ASTCDecoderPass& astc_decoder_pass; 40 ASTCDecoderPass& astc_decoder_pass;
65 std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{}; 41 RenderPassCache& render_pass_cache;
66 42
67 void Finish(); 43 void Finish();
68 44
69 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); 45 StagingBufferRef UploadStagingBuffer(size_t size);
70 46
71 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); 47 StagingBufferRef DownloadStagingBuffer(size_t size);
72 48
73 void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, 49 void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
74 const Region2D& dst_region, const Region2D& src_region, 50 const Region2D& dst_region, const Region2D& src_region,
@@ -79,7 +55,7 @@ struct TextureCacheRuntime {
79 55
80 void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); 56 void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
81 57
82 [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept { 58 bool CanAccelerateImageUpload(Image&) const noexcept {
83 return false; 59 return false;
84 } 60 }
85 61
@@ -117,8 +93,6 @@ public:
117 void UploadMemory(const StagingBufferRef& map, 93 void UploadMemory(const StagingBufferRef& map,
118 std::span<const VideoCommon::BufferImageCopy> copies); 94 std::span<const VideoCommon::BufferImageCopy> copies);
119 95
120 void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies);
121
122 void DownloadMemory(const StagingBufferRef& map, 96 void DownloadMemory(const StagingBufferRef& map,
123 std::span<const VideoCommon::BufferImageCopy> copies); 97 std::span<const VideoCommon::BufferImageCopy> copies);
124 98
@@ -126,10 +100,6 @@ public:
126 return *image; 100 return *image;
127 } 101 }
128 102
129 [[nodiscard]] VkBuffer Buffer() const noexcept {
130 return *buffer;
131 }
132
133 [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { 103 [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept {
134 return aspect_mask; 104 return aspect_mask;
135 } 105 }
@@ -146,7 +116,6 @@ public:
146private: 116private:
147 VKScheduler* scheduler; 117 VKScheduler* scheduler;
148 vk::Image image; 118 vk::Image image;
149 vk::Buffer buffer;
150 MemoryCommit commit; 119 MemoryCommit commit;
151 vk::ImageView image_view; 120 vk::ImageView image_view;
152 std::vector<vk::ImageView> storage_image_views; 121 std::vector<vk::ImageView> storage_image_views;
@@ -157,18 +126,19 @@ private:
157class ImageView : public VideoCommon::ImageViewBase { 126class ImageView : public VideoCommon::ImageViewBase {
158public: 127public:
159 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); 128 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
129 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
130 const VideoCommon::ImageViewInfo&, GPUVAddr);
160 explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); 131 explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
161 132
162 [[nodiscard]] VkImageView DepthView(); 133 [[nodiscard]] VkImageView DepthView();
163 134
164 [[nodiscard]] VkImageView StencilView(); 135 [[nodiscard]] VkImageView StencilView();
165 136
166 [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept { 137 [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type,
167 return *image_views[static_cast<size_t>(query_type)]; 138 Shader::ImageFormat image_format);
168 }
169 139
170 [[nodiscard]] VkBufferView BufferView() const noexcept { 140 [[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept {
171 return *buffer_view; 141 return *image_views[static_cast<size_t>(texture_type)];
172 } 142 }
173 143
174 [[nodiscard]] VkImage ImageHandle() const noexcept { 144 [[nodiscard]] VkImage ImageHandle() const noexcept {
@@ -179,26 +149,36 @@ public:
179 return render_target; 149 return render_target;
180 } 150 }
181 151
182 [[nodiscard]] PixelFormat ImageFormat() const noexcept {
183 return image_format;
184 }
185
186 [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { 152 [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
187 return samples; 153 return samples;
188 } 154 }
189 155
156 [[nodiscard]] GPUVAddr GpuAddr() const noexcept {
157 return gpu_addr;
158 }
159
160 [[nodiscard]] u32 BufferSize() const noexcept {
161 return buffer_size;
162 }
163
190private: 164private:
191 [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); 165 struct StorageViews {
166 std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> signeds;
167 std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> unsigneds;
168 };
169
170 [[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask);
192 171
193 const Device* device = nullptr; 172 const Device* device = nullptr;
194 std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views; 173 std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
174 std::unique_ptr<StorageViews> storage_views;
195 vk::ImageView depth_view; 175 vk::ImageView depth_view;
196 vk::ImageView stencil_view; 176 vk::ImageView stencil_view;
197 vk::BufferView buffer_view;
198 VkImage image_handle = VK_NULL_HANDLE; 177 VkImage image_handle = VK_NULL_HANDLE;
199 VkImageView render_target = VK_NULL_HANDLE; 178 VkImageView render_target = VK_NULL_HANDLE;
200 PixelFormat image_format = PixelFormat::Invalid;
201 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; 179 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
180 GPUVAddr gpu_addr = 0;
181 u32 buffer_size = 0;
202}; 182};
203 183
204class ImageAlloc : public VideoCommon::ImageAllocBase {}; 184class ImageAlloc : public VideoCommon::ImageAllocBase {};
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index dc45fdcb1..0df3a7fe9 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -15,7 +15,9 @@
15namespace Vulkan { 15namespace Vulkan {
16 16
17VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_) 17VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_)
18 : device{device_}, scheduler{scheduler_} {} 18 : device{device_}, scheduler{scheduler_} {
19 payload_cursor = payload.data();
20}
19 21
20VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; 22VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
21 23
@@ -36,13 +38,4 @@ void VKUpdateDescriptorQueue::Acquire() {
36 upload_start = payload_cursor; 38 upload_start = payload_cursor;
37} 39}
38 40
39void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
40 VkDescriptorSet set) {
41 const void* const data = upload_start;
42 const vk::Device* const logical = &device.GetLogical();
43 scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) {
44 logical->UpdateDescriptorSet(set, update_template, data);
45 });
46}
47
48} // namespace Vulkan 41} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index d35e77c44..d7de4c490 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -39,7 +39,9 @@ public:
39 39
40 void Acquire(); 40 void Acquire();
41 41
42 void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); 42 const DescriptorUpdateEntry* UpdateData() const noexcept {
43 return upload_start;
44 }
43 45
44 void AddSampledImage(VkImageView image_view, VkSampler sampler) { 46 void AddSampledImage(VkImageView image_view, VkSampler sampler) {
45 *(payload_cursor++) = VkDescriptorImageInfo{ 47 *(payload_cursor++) = VkDescriptorImageInfo{
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp
deleted file mode 100644
index db11144c7..000000000
--- a/src/video_core/shader/ast.cpp
+++ /dev/null
@@ -1,752 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6#include <string_view>
7
8#include <fmt/format.h>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "video_core/shader/ast.h"
13#include "video_core/shader/expr.h"
14
15namespace VideoCommon::Shader {
16
17ASTZipper::ASTZipper() = default;
18
19void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) {
20 ASSERT(new_first->manager == nullptr);
21 first = new_first;
22 last = new_first;
23
24 ASTNode current = first;
25 while (current) {
26 current->manager = this;
27 current->parent = parent;
28 last = current;
29 current = current->next;
30 }
31}
32
33void ASTZipper::PushBack(const ASTNode new_node) {
34 ASSERT(new_node->manager == nullptr);
35 new_node->previous = last;
36 if (last) {
37 last->next = new_node;
38 }
39 new_node->next.reset();
40 last = new_node;
41 if (!first) {
42 first = new_node;
43 }
44 new_node->manager = this;
45}
46
47void ASTZipper::PushFront(const ASTNode new_node) {
48 ASSERT(new_node->manager == nullptr);
49 new_node->previous.reset();
50 new_node->next = first;
51 if (first) {
52 first->previous = new_node;
53 }
54 if (last == first) {
55 last = new_node;
56 }
57 first = new_node;
58 new_node->manager = this;
59}
60
61void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) {
62 ASSERT(new_node->manager == nullptr);
63 if (!at_node) {
64 PushFront(new_node);
65 return;
66 }
67 const ASTNode next = at_node->next;
68 if (next) {
69 next->previous = new_node;
70 }
71 new_node->previous = at_node;
72 if (at_node == last) {
73 last = new_node;
74 }
75 new_node->next = next;
76 at_node->next = new_node;
77 new_node->manager = this;
78}
79
80void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) {
81 ASSERT(new_node->manager == nullptr);
82 if (!at_node) {
83 PushBack(new_node);
84 return;
85 }
86 const ASTNode previous = at_node->previous;
87 if (previous) {
88 previous->next = new_node;
89 }
90 new_node->next = at_node;
91 if (at_node == first) {
92 first = new_node;
93 }
94 new_node->previous = previous;
95 at_node->previous = new_node;
96 new_node->manager = this;
97}
98
99void ASTZipper::DetachTail(ASTNode node) {
100 ASSERT(node->manager == this);
101 if (node == first) {
102 first.reset();
103 last.reset();
104 return;
105 }
106
107 last = node->previous;
108 last->next.reset();
109 node->previous.reset();
110
111 ASTNode current = std::move(node);
112 while (current) {
113 current->manager = nullptr;
114 current->parent.reset();
115 current = current->next;
116 }
117}
118
119void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) {
120 ASSERT(start->manager == this && end->manager == this);
121 if (start == end) {
122 DetachSingle(start);
123 return;
124 }
125 const ASTNode prev = start->previous;
126 const ASTNode post = end->next;
127 if (!prev) {
128 first = post;
129 } else {
130 prev->next = post;
131 }
132 if (!post) {
133 last = prev;
134 } else {
135 post->previous = prev;
136 }
137 start->previous.reset();
138 end->next.reset();
139 ASTNode current = start;
140 bool found = false;
141 while (current) {
142 current->manager = nullptr;
143 current->parent.reset();
144 found |= current == end;
145 current = current->next;
146 }
147 ASSERT(found);
148}
149
150void ASTZipper::DetachSingle(const ASTNode node) {
151 ASSERT(node->manager == this);
152 const ASTNode prev = node->previous;
153 const ASTNode post = node->next;
154 node->previous.reset();
155 node->next.reset();
156 if (!prev) {
157 first = post;
158 } else {
159 prev->next = post;
160 }
161 if (!post) {
162 last = prev;
163 } else {
164 post->previous = prev;
165 }
166
167 node->manager = nullptr;
168 node->parent.reset();
169}
170
171void ASTZipper::Remove(const ASTNode node) {
172 ASSERT(node->manager == this);
173 const ASTNode next = node->next;
174 const ASTNode previous = node->previous;
175 if (previous) {
176 previous->next = next;
177 }
178 if (next) {
179 next->previous = previous;
180 }
181 node->parent.reset();
182 node->manager = nullptr;
183 if (node == last) {
184 last = previous;
185 }
186 if (node == first) {
187 first = next;
188 }
189}
190
191class ExprPrinter final {
192public:
193 void operator()(const ExprAnd& expr) {
194 inner += "( ";
195 std::visit(*this, *expr.operand1);
196 inner += " && ";
197 std::visit(*this, *expr.operand2);
198 inner += ')';
199 }
200
201 void operator()(const ExprOr& expr) {
202 inner += "( ";
203 std::visit(*this, *expr.operand1);
204 inner += " || ";
205 std::visit(*this, *expr.operand2);
206 inner += ')';
207 }
208
209 void operator()(const ExprNot& expr) {
210 inner += "!";
211 std::visit(*this, *expr.operand1);
212 }
213
214 void operator()(const ExprPredicate& expr) {
215 inner += fmt::format("P{}", expr.predicate);
216 }
217
218 void operator()(const ExprCondCode& expr) {
219 inner += fmt::format("CC{}", expr.cc);
220 }
221
222 void operator()(const ExprVar& expr) {
223 inner += fmt::format("V{}", expr.var_index);
224 }
225
226 void operator()(const ExprBoolean& expr) {
227 inner += expr.value ? "true" : "false";
228 }
229
230 void operator()(const ExprGprEqual& expr) {
231 inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value);
232 }
233
234 const std::string& GetResult() const {
235 return inner;
236 }
237
238private:
239 std::string inner;
240};
241
242class ASTPrinter {
243public:
244 void operator()(const ASTProgram& ast) {
245 scope++;
246 inner += "program {\n";
247 ASTNode current = ast.nodes.GetFirst();
248 while (current) {
249 Visit(current);
250 current = current->GetNext();
251 }
252 inner += "}\n";
253 scope--;
254 }
255
256 void operator()(const ASTIfThen& ast) {
257 ExprPrinter expr_parser{};
258 std::visit(expr_parser, *ast.condition);
259 inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult());
260 scope++;
261 ASTNode current = ast.nodes.GetFirst();
262 while (current) {
263 Visit(current);
264 current = current->GetNext();
265 }
266 scope--;
267 inner += fmt::format("{}}}\n", Indent());
268 }
269
270 void operator()(const ASTIfElse& ast) {
271 inner += Indent();
272 inner += "else {\n";
273
274 scope++;
275 ASTNode current = ast.nodes.GetFirst();
276 while (current) {
277 Visit(current);
278 current = current->GetNext();
279 }
280 scope--;
281
282 inner += Indent();
283 inner += "}\n";
284 }
285
286 void operator()(const ASTBlockEncoded& ast) {
287 inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end);
288 }
289
290 void operator()([[maybe_unused]] const ASTBlockDecoded& ast) {
291 inner += Indent();
292 inner += "Block;\n";
293 }
294
295 void operator()(const ASTVarSet& ast) {
296 ExprPrinter expr_parser{};
297 std::visit(expr_parser, *ast.condition);
298 inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult());
299 }
300
301 void operator()(const ASTLabel& ast) {
302 inner += fmt::format("Label_{}:\n", ast.index);
303 }
304
305 void operator()(const ASTGoto& ast) {
306 ExprPrinter expr_parser{};
307 std::visit(expr_parser, *ast.condition);
308 inner +=
309 fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label);
310 }
311
312 void operator()(const ASTDoWhile& ast) {
313 ExprPrinter expr_parser{};
314 std::visit(expr_parser, *ast.condition);
315 inner += fmt::format("{}do {{\n", Indent());
316 scope++;
317 ASTNode current = ast.nodes.GetFirst();
318 while (current) {
319 Visit(current);
320 current = current->GetNext();
321 }
322 scope--;
323 inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult());
324 }
325
326 void operator()(const ASTReturn& ast) {
327 ExprPrinter expr_parser{};
328 std::visit(expr_parser, *ast.condition);
329 inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(),
330 ast.kills ? "discard" : "exit");
331 }
332
333 void operator()(const ASTBreak& ast) {
334 ExprPrinter expr_parser{};
335 std::visit(expr_parser, *ast.condition);
336 inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult());
337 }
338
339 void Visit(const ASTNode& node) {
340 std::visit(*this, *node->GetInnerData());
341 }
342
343 const std::string& GetResult() const {
344 return inner;
345 }
346
347private:
348 std::string_view Indent() {
349 if (space_segment_scope == scope) {
350 return space_segment;
351 }
352
353 // Ensure that we don't exceed our view.
354 ASSERT(scope * 2 < spaces.size());
355
356 space_segment = spaces.substr(0, scope * 2);
357 space_segment_scope = scope;
358 return space_segment;
359 }
360
361 std::string inner{};
362 std::string_view space_segment;
363
364 u32 scope{};
365 u32 space_segment_scope{};
366
367 static constexpr std::string_view spaces{" "};
368};
369
370std::string ASTManager::Print() const {
371 ASTPrinter printer{};
372 printer.Visit(main_node);
373 return printer.GetResult();
374}
375
376ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_)
377 : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {}
378
379ASTManager::~ASTManager() {
380 Clear();
381}
382
383void ASTManager::Init() {
384 main_node = ASTBase::Make<ASTProgram>(ASTNode{});
385 program = std::get_if<ASTProgram>(main_node->GetInnerData());
386 false_condition = MakeExpr<ExprBoolean>(false);
387}
388
389void ASTManager::DeclareLabel(u32 address) {
390 const auto pair = labels_map.emplace(address, labels_count);
391 if (pair.second) {
392 labels_count++;
393 labels.resize(labels_count);
394 }
395}
396
397void ASTManager::InsertLabel(u32 address) {
398 const u32 index = labels_map[address];
399 const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index);
400 labels[index] = label;
401 program->nodes.PushBack(label);
402}
403
404void ASTManager::InsertGoto(Expr condition, u32 address) {
405 const u32 index = labels_map[address];
406 const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index);
407 gotos.push_back(goto_node);
408 program->nodes.PushBack(goto_node);
409}
410
411void ASTManager::InsertBlock(u32 start_address, u32 end_address) {
412 ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address);
413 program->nodes.PushBack(std::move(block));
414}
415
416void ASTManager::InsertReturn(Expr condition, bool kills) {
417 ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills);
418 program->nodes.PushBack(std::move(node));
419}
420
421// The decompile algorithm is based on
422// "Taming control flow: A structured approach to eliminating goto statements"
423// by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be
424// on the same structured level as the label which they jump to. This is done,
425// through outward/inward movements and lifting. Once they are at the same
426// level, you can enclose them in an "if" structure or a "do-while" structure.
427void ASTManager::Decompile() {
428 auto it = gotos.begin();
429 while (it != gotos.end()) {
430 const ASTNode goto_node = *it;
431 const auto label_index = goto_node->GetGotoLabel();
432 if (!label_index) {
433 return;
434 }
435 const ASTNode label = labels[*label_index];
436 if (!full_decompile) {
437 // We only decompile backward jumps
438 if (!IsBackwardsJump(goto_node, label)) {
439 it++;
440 continue;
441 }
442 }
443 if (IndirectlyRelated(goto_node, label)) {
444 while (!DirectlyRelated(goto_node, label)) {
445 MoveOutward(goto_node);
446 }
447 }
448 if (DirectlyRelated(goto_node, label)) {
449 u32 goto_level = goto_node->GetLevel();
450 const u32 label_level = label->GetLevel();
451 while (label_level < goto_level) {
452 MoveOutward(goto_node);
453 goto_level--;
454 }
455 // TODO(Blinkhawk): Implement Lifting and Inward Movements
456 }
457 if (label->GetParent() == goto_node->GetParent()) {
458 bool is_loop = false;
459 ASTNode current = goto_node->GetPrevious();
460 while (current) {
461 if (current == label) {
462 is_loop = true;
463 break;
464 }
465 current = current->GetPrevious();
466 }
467
468 if (is_loop) {
469 EncloseDoWhile(goto_node, label);
470 } else {
471 EncloseIfThen(goto_node, label);
472 }
473 it = gotos.erase(it);
474 continue;
475 }
476 it++;
477 }
478 if (full_decompile) {
479 for (const ASTNode& label : labels) {
480 auto& manager = label->GetManager();
481 manager.Remove(label);
482 }
483 labels.clear();
484 } else {
485 auto label_it = labels.begin();
486 while (label_it != labels.end()) {
487 bool can_remove = true;
488 ASTNode label = *label_it;
489 for (const ASTNode& goto_node : gotos) {
490 const auto label_index = goto_node->GetGotoLabel();
491 if (!label_index) {
492 return;
493 }
494 ASTNode& glabel = labels[*label_index];
495 if (glabel == label) {
496 can_remove = false;
497 break;
498 }
499 }
500 if (can_remove) {
501 label->MarkLabelUnused();
502 }
503 }
504 }
505}
506
507bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const {
508 u32 goto_level = goto_node->GetLevel();
509 u32 label_level = label_node->GetLevel();
510 while (goto_level > label_level) {
511 goto_level--;
512 goto_node = goto_node->GetParent();
513 }
514 while (label_level > goto_level) {
515 label_level--;
516 label_node = label_node->GetParent();
517 }
518 while (goto_node->GetParent() != label_node->GetParent()) {
519 goto_node = goto_node->GetParent();
520 label_node = label_node->GetParent();
521 }
522 ASTNode current = goto_node->GetPrevious();
523 while (current) {
524 if (current == label_node) {
525 return true;
526 }
527 current = current->GetPrevious();
528 }
529 return false;
530}
531
532bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const {
533 return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second));
534}
535
536bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const {
537 if (first->GetParent() == second->GetParent()) {
538 return false;
539 }
540 const u32 first_level = first->GetLevel();
541 const u32 second_level = second->GetLevel();
542 u32 min_level;
543 u32 max_level;
544 ASTNode max;
545 ASTNode min;
546 if (first_level > second_level) {
547 min_level = second_level;
548 min = second;
549 max_level = first_level;
550 max = first;
551 } else {
552 min_level = first_level;
553 min = first;
554 max_level = second_level;
555 max = second;
556 }
557
558 while (max_level > min_level) {
559 max_level--;
560 max = max->GetParent();
561 }
562
563 return min->GetParent() == max->GetParent();
564}
565
566void ASTManager::ShowCurrentState(std::string_view state) const {
567 LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print());
568 SanityCheck();
569}
570
571void ASTManager::SanityCheck() const {
572 for (const auto& label : labels) {
573 if (!label->GetParent()) {
574 LOG_CRITICAL(HW_GPU, "Sanity Check Failed");
575 }
576 }
577}
578
579void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) {
580 ASTZipper& zipper = goto_node->GetManager();
581 const ASTNode loop_start = label->GetNext();
582 if (loop_start == goto_node) {
583 zipper.Remove(goto_node);
584 return;
585 }
586 const ASTNode parent = label->GetParent();
587 const Expr condition = goto_node->GetGotoCondition();
588 zipper.DetachSegment(loop_start, goto_node);
589 const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition);
590 ASTZipper* sub_zipper = do_while_node->GetSubNodes();
591 sub_zipper->Init(loop_start, do_while_node);
592 zipper.InsertAfter(do_while_node, label);
593 sub_zipper->Remove(goto_node);
594}
595
596void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) {
597 ASTZipper& zipper = goto_node->GetManager();
598 const ASTNode if_end = label->GetPrevious();
599 if (if_end == goto_node) {
600 zipper.Remove(goto_node);
601 return;
602 }
603 const ASTNode prev = goto_node->GetPrevious();
604 const Expr condition = goto_node->GetGotoCondition();
605 bool do_else = false;
606 if (!disable_else_derivation && prev->IsIfThen()) {
607 const Expr if_condition = prev->GetIfCondition();
608 do_else = ExprAreEqual(if_condition, condition);
609 }
610 const ASTNode parent = label->GetParent();
611 zipper.DetachSegment(goto_node, if_end);
612 ASTNode if_node;
613 if (do_else) {
614 if_node = ASTBase::Make<ASTIfElse>(parent);
615 } else {
616 Expr neg_condition = MakeExprNot(condition);
617 if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition);
618 }
619 ASTZipper* sub_zipper = if_node->GetSubNodes();
620 sub_zipper->Init(goto_node, if_node);
621 zipper.InsertAfter(if_node, prev);
622 sub_zipper->Remove(goto_node);
623}
624
625void ASTManager::MoveOutward(ASTNode goto_node) {
626 ASTZipper& zipper = goto_node->GetManager();
627 const ASTNode parent = goto_node->GetParent();
628 ASTZipper& zipper2 = parent->GetManager();
629 const ASTNode grandpa = parent->GetParent();
630 const bool is_loop = parent->IsLoop();
631 const bool is_else = parent->IsIfElse();
632 const bool is_if = parent->IsIfThen();
633
634 const ASTNode prev = goto_node->GetPrevious();
635 const ASTNode post = goto_node->GetNext();
636
637 const Expr condition = goto_node->GetGotoCondition();
638 zipper.DetachSingle(goto_node);
639 if (is_loop) {
640 const u32 var_index = NewVariable();
641 const Expr var_condition = MakeExpr<ExprVar>(var_index);
642 const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
643 const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
644 zipper2.InsertBefore(var_node_init, parent);
645 zipper.InsertAfter(var_node, prev);
646 goto_node->SetGotoCondition(var_condition);
647 const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition);
648 zipper.InsertAfter(break_node, var_node);
649 } else if (is_if || is_else) {
650 const u32 var_index = NewVariable();
651 const Expr var_condition = MakeExpr<ExprVar>(var_index);
652 const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
653 const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
654 if (is_if) {
655 zipper2.InsertBefore(var_node_init, parent);
656 } else {
657 zipper2.InsertBefore(var_node_init, parent->GetPrevious());
658 }
659 zipper.InsertAfter(var_node, prev);
660 goto_node->SetGotoCondition(var_condition);
661 if (post) {
662 zipper.DetachTail(post);
663 const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition));
664 ASTZipper* sub_zipper = if_node->GetSubNodes();
665 sub_zipper->Init(post, if_node);
666 zipper.InsertAfter(if_node, var_node);
667 }
668 } else {
669 UNREACHABLE();
670 }
671 const ASTNode next = parent->GetNext();
672 if (is_if && next && next->IsIfElse()) {
673 zipper2.InsertAfter(goto_node, next);
674 goto_node->SetParent(grandpa);
675 return;
676 }
677 zipper2.InsertAfter(goto_node, parent);
678 goto_node->SetParent(grandpa);
679}
680
681class ASTClearer {
682public:
683 ASTClearer() = default;
684
685 void operator()(const ASTProgram& ast) {
686 ASTNode current = ast.nodes.GetFirst();
687 while (current) {
688 Visit(current);
689 current = current->GetNext();
690 }
691 }
692
693 void operator()(const ASTIfThen& ast) {
694 ASTNode current = ast.nodes.GetFirst();
695 while (current) {
696 Visit(current);
697 current = current->GetNext();
698 }
699 }
700
701 void operator()(const ASTIfElse& ast) {
702 ASTNode current = ast.nodes.GetFirst();
703 while (current) {
704 Visit(current);
705 current = current->GetNext();
706 }
707 }
708
709 void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {}
710
711 void operator()(ASTBlockDecoded& ast) {
712 ast.nodes.clear();
713 }
714
715 void operator()([[maybe_unused]] const ASTVarSet& ast) {}
716
717 void operator()([[maybe_unused]] const ASTLabel& ast) {}
718
719 void operator()([[maybe_unused]] const ASTGoto& ast) {}
720
721 void operator()(const ASTDoWhile& ast) {
722 ASTNode current = ast.nodes.GetFirst();
723 while (current) {
724 Visit(current);
725 current = current->GetNext();
726 }
727 }
728
729 void operator()([[maybe_unused]] const ASTReturn& ast) {}
730
731 void operator()([[maybe_unused]] const ASTBreak& ast) {}
732
733 void Visit(const ASTNode& node) {
734 std::visit(*this, *node->GetInnerData());
735 node->Clear();
736 }
737};
738
739void ASTManager::Clear() {
740 if (!main_node) {
741 return;
742 }
743 ASTClearer clearer{};
744 clearer.Visit(main_node);
745 main_node.reset();
746 program = nullptr;
747 labels_map.clear();
748 labels.clear();
749 gotos.clear();
750}
751
752} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
deleted file mode 100644
index dc49b369e..000000000
--- a/src/video_core/shader/ast.h
+++ /dev/null
@@ -1,398 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <functional>
8#include <list>
9#include <memory>
10#include <optional>
11#include <string>
12#include <unordered_map>
13#include <vector>
14
15#include "video_core/shader/expr.h"
16#include "video_core/shader/node.h"
17
18namespace VideoCommon::Shader {
19
20class ASTBase;
21class ASTBlockDecoded;
22class ASTBlockEncoded;
23class ASTBreak;
24class ASTDoWhile;
25class ASTGoto;
26class ASTIfElse;
27class ASTIfThen;
28class ASTLabel;
29class ASTProgram;
30class ASTReturn;
31class ASTVarSet;
32
33using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded,
34 ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>;
35
36using ASTNode = std::shared_ptr<ASTBase>;
37
38enum class ASTZipperType : u32 {
39 Program,
40 IfThen,
41 IfElse,
42 Loop,
43};
44
45class ASTZipper final {
46public:
47 explicit ASTZipper();
48
49 void Init(ASTNode first, ASTNode parent);
50
51 ASTNode GetFirst() const {
52 return first;
53 }
54
55 ASTNode GetLast() const {
56 return last;
57 }
58
59 void PushBack(ASTNode new_node);
60 void PushFront(ASTNode new_node);
61 void InsertAfter(ASTNode new_node, ASTNode at_node);
62 void InsertBefore(ASTNode new_node, ASTNode at_node);
63 void DetachTail(ASTNode node);
64 void DetachSingle(ASTNode node);
65 void DetachSegment(ASTNode start, ASTNode end);
66 void Remove(ASTNode node);
67
68 ASTNode first;
69 ASTNode last;
70};
71
72class ASTProgram {
73public:
74 ASTZipper nodes{};
75};
76
77class ASTIfThen {
78public:
79 explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {}
80 Expr condition;
81 ASTZipper nodes{};
82};
83
84class ASTIfElse {
85public:
86 ASTZipper nodes{};
87};
88
89class ASTBlockEncoded {
90public:
91 explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {}
92 u32 start;
93 u32 end;
94};
95
96class ASTBlockDecoded {
97public:
98 explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {}
99 NodeBlock nodes;
100};
101
102class ASTVarSet {
103public:
104 explicit ASTVarSet(u32 index_, Expr condition_)
105 : index{index_}, condition{std::move(condition_)} {}
106
107 u32 index;
108 Expr condition;
109};
110
111class ASTLabel {
112public:
113 explicit ASTLabel(u32 index_) : index{index_} {}
114 u32 index;
115 bool unused{};
116};
117
118class ASTGoto {
119public:
120 explicit ASTGoto(Expr condition_, u32 label_)
121 : condition{std::move(condition_)}, label{label_} {}
122
123 Expr condition;
124 u32 label;
125};
126
127class ASTDoWhile {
128public:
129 explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {}
130 Expr condition;
131 ASTZipper nodes{};
132};
133
134class ASTReturn {
135public:
136 explicit ASTReturn(Expr condition_, bool kills_)
137 : condition{std::move(condition_)}, kills{kills_} {}
138
139 Expr condition;
140 bool kills;
141};
142
143class ASTBreak {
144public:
145 explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {}
146 Expr condition;
147};
148
149class ASTBase {
150public:
151 explicit ASTBase(ASTNode parent_, ASTData data_)
152 : data{std::move(data_)}, parent{std::move(parent_)} {}
153
154 template <class U, class... Args>
155 static ASTNode Make(ASTNode parent, Args&&... args) {
156 return std::make_shared<ASTBase>(std::move(parent),
157 ASTData(U(std::forward<Args>(args)...)));
158 }
159
160 void SetParent(ASTNode new_parent) {
161 parent = std::move(new_parent);
162 }
163
164 ASTNode& GetParent() {
165 return parent;
166 }
167
168 const ASTNode& GetParent() const {
169 return parent;
170 }
171
172 u32 GetLevel() const {
173 u32 level = 0;
174 auto next_parent = parent;
175 while (next_parent) {
176 next_parent = next_parent->GetParent();
177 level++;
178 }
179 return level;
180 }
181
182 ASTData* GetInnerData() {
183 return &data;
184 }
185
186 const ASTData* GetInnerData() const {
187 return &data;
188 }
189
190 ASTNode GetNext() const {
191 return next;
192 }
193
194 ASTNode GetPrevious() const {
195 return previous;
196 }
197
198 ASTZipper& GetManager() {
199 return *manager;
200 }
201
202 const ASTZipper& GetManager() const {
203 return *manager;
204 }
205
206 std::optional<u32> GetGotoLabel() const {
207 if (const auto* inner = std::get_if<ASTGoto>(&data)) {
208 return {inner->label};
209 }
210 return std::nullopt;
211 }
212
213 Expr GetGotoCondition() const {
214 if (const auto* inner = std::get_if<ASTGoto>(&data)) {
215 return inner->condition;
216 }
217 return nullptr;
218 }
219
220 void MarkLabelUnused() {
221 if (auto* inner = std::get_if<ASTLabel>(&data)) {
222 inner->unused = true;
223 }
224 }
225
226 bool IsLabelUnused() const {
227 if (const auto* inner = std::get_if<ASTLabel>(&data)) {
228 return inner->unused;
229 }
230 return true;
231 }
232
233 std::optional<u32> GetLabelIndex() const {
234 if (const auto* inner = std::get_if<ASTLabel>(&data)) {
235 return {inner->index};
236 }
237 return std::nullopt;
238 }
239
240 Expr GetIfCondition() const {
241 if (const auto* inner = std::get_if<ASTIfThen>(&data)) {
242 return inner->condition;
243 }
244 return nullptr;
245 }
246
247 void SetGotoCondition(Expr new_condition) {
248 if (auto* inner = std::get_if<ASTGoto>(&data)) {
249 inner->condition = std::move(new_condition);
250 }
251 }
252
253 bool IsIfThen() const {
254 return std::holds_alternative<ASTIfThen>(data);
255 }
256
257 bool IsIfElse() const {
258 return std::holds_alternative<ASTIfElse>(data);
259 }
260
261 bool IsBlockEncoded() const {
262 return std::holds_alternative<ASTBlockEncoded>(data);
263 }
264
265 void TransformBlockEncoded(NodeBlock&& nodes) {
266 data = ASTBlockDecoded(std::move(nodes));
267 }
268
269 bool IsLoop() const {
270 return std::holds_alternative<ASTDoWhile>(data);
271 }
272
273 ASTZipper* GetSubNodes() {
274 if (std::holds_alternative<ASTProgram>(data)) {
275 return &std::get_if<ASTProgram>(&data)->nodes;
276 }
277 if (std::holds_alternative<ASTIfThen>(data)) {
278 return &std::get_if<ASTIfThen>(&data)->nodes;
279 }
280 if (std::holds_alternative<ASTIfElse>(data)) {
281 return &std::get_if<ASTIfElse>(&data)->nodes;
282 }
283 if (std::holds_alternative<ASTDoWhile>(data)) {
284 return &std::get_if<ASTDoWhile>(&data)->nodes;
285 }
286 return nullptr;
287 }
288
289 void Clear() {
290 next.reset();
291 previous.reset();
292 parent.reset();
293 manager = nullptr;
294 }
295
296private:
297 friend class ASTZipper;
298
299 ASTData data;
300 ASTNode parent;
301 ASTNode next;
302 ASTNode previous;
303 ASTZipper* manager{};
304};
305
306class ASTManager final {
307public:
308 explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_);
309 ~ASTManager();
310
311 ASTManager(const ASTManager& o) = delete;
312 ASTManager& operator=(const ASTManager& other) = delete;
313
314 ASTManager(ASTManager&& other) noexcept = default;
315 ASTManager& operator=(ASTManager&& other) noexcept = default;
316
317 void Init();
318
319 void DeclareLabel(u32 address);
320
321 void InsertLabel(u32 address);
322
323 void InsertGoto(Expr condition, u32 address);
324
325 void InsertBlock(u32 start_address, u32 end_address);
326
327 void InsertReturn(Expr condition, bool kills);
328
329 std::string Print() const;
330
331 void Decompile();
332
333 void ShowCurrentState(std::string_view state) const;
334
335 void SanityCheck() const;
336
337 void Clear();
338
339 bool IsFullyDecompiled() const {
340 if (full_decompile) {
341 return gotos.empty();
342 }
343
344 for (ASTNode goto_node : gotos) {
345 auto label_index = goto_node->GetGotoLabel();
346 if (!label_index) {
347 return false;
348 }
349 ASTNode glabel = labels[*label_index];
350 if (IsBackwardsJump(goto_node, glabel)) {
351 return false;
352 }
353 }
354 return true;
355 }
356
357 ASTNode GetProgram() const {
358 return main_node;
359 }
360
361 u32 GetVariables() const {
362 return variables;
363 }
364
365 const std::vector<ASTNode>& GetLabels() const {
366 return labels;
367 }
368
369private:
370 bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const;
371
372 bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const;
373
374 bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const;
375
376 void EncloseDoWhile(ASTNode goto_node, ASTNode label);
377
378 void EncloseIfThen(ASTNode goto_node, ASTNode label);
379
380 void MoveOutward(ASTNode goto_node);
381
382 u32 NewVariable() {
383 return variables++;
384 }
385
386 bool full_decompile{};
387 bool disable_else_derivation{};
388 std::unordered_map<u32, u32> labels_map{};
389 u32 labels_count{};
390 std::vector<ASTNode> labels{};
391 std::list<ASTNode> gotos{};
392 u32 variables{};
393 ASTProgram* program{};
394 ASTNode main_node{};
395 Expr false_condition{};
396};
397
398} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
deleted file mode 100644
index 02adcf9c7..000000000
--- a/src/video_core/shader/async_shaders.cpp
+++ /dev/null
@@ -1,234 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <condition_variable>
6#include <mutex>
7#include <thread>
8#include <vector>
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/renderer_base.h"
11#include "video_core/renderer_opengl/gl_shader_cache.h"
12#include "video_core/shader/async_shaders.h"
13
14namespace VideoCommon::Shader {
15
16AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {}
17
18AsyncShaders::~AsyncShaders() {
19 KillWorkers();
20}
21
22void AsyncShaders::AllocateWorkers() {
23 // Use at least one thread
24 u32 num_workers = 1;
25
26 // Deduce how many more threads we can use
27 const u32 thread_count = std::thread::hardware_concurrency();
28 if (thread_count >= 8) {
29 // Increase async workers by 1 for every 2 threads >= 8
30 num_workers += 1 + (thread_count - 8) / 2;
31 }
32
33 // If we already have workers queued, ignore
34 if (num_workers == worker_threads.size()) {
35 return;
36 }
37
38 // If workers already exist, clear them
39 if (!worker_threads.empty()) {
40 FreeWorkers();
41 }
42
43 // Create workers
44 for (std::size_t i = 0; i < num_workers; i++) {
45 context_list.push_back(emu_window.CreateSharedContext());
46 worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this,
47 context_list[i].get());
48 }
49}
50
51void AsyncShaders::FreeWorkers() {
52 // Mark all threads to quit
53 is_thread_exiting.store(true);
54 cv.notify_all();
55 for (auto& thread : worker_threads) {
56 thread.join();
57 }
58 // Clear our shared contexts
59 context_list.clear();
60
61 // Clear our worker threads
62 worker_threads.clear();
63}
64
65void AsyncShaders::KillWorkers() {
66 is_thread_exiting.store(true);
67 cv.notify_all();
68 for (auto& thread : worker_threads) {
69 thread.detach();
70 }
71 // Clear our shared contexts
72 context_list.clear();
73
74 // Clear our worker threads
75 worker_threads.clear();
76}
77
78bool AsyncShaders::HasWorkQueued() const {
79 return !pending_queue.empty();
80}
81
82bool AsyncShaders::HasCompletedWork() const {
83 std::shared_lock lock{completed_mutex};
84 return !finished_work.empty();
85}
86
87bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
88 const auto& regs = gpu.Maxwell3D().regs;
89
90 // If something is using depth, we can assume that games are not rendering anything which will
91 // be used one time.
92 if (regs.zeta_enable) {
93 return true;
94 }
95
96 // If games are using a small index count, we can assume these are full screen quads. Usually
97 // these shaders are only used once for building textures so we can assume they can't be built
98 // async
99 if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) {
100 return false;
101 }
102
103 return true;
104}
105
106std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
107 std::vector<Result> results;
108 {
109 std::unique_lock lock{completed_mutex};
110 results = std::move(finished_work);
111 finished_work.clear();
112 }
113 return results;
114}
115
116void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
117 Tegra::Engines::ShaderType shader_type, u64 uid,
118 std::vector<u64> code, std::vector<u64> code_b,
119 u32 main_offset, CompilerSettings compiler_settings,
120 const Registry& registry, VAddr cpu_addr) {
121 std::unique_lock lock(queue_mutex);
122 pending_queue.push({
123 .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
124 .device = &device,
125 .shader_type = shader_type,
126 .uid = uid,
127 .code = std::move(code),
128 .code_b = std::move(code_b),
129 .main_offset = main_offset,
130 .compiler_settings = compiler_settings,
131 .registry = registry,
132 .cpu_address = cpu_addr,
133 .pp_cache = nullptr,
134 .vk_device = nullptr,
135 .scheduler = nullptr,
136 .descriptor_pool = nullptr,
137 .update_descriptor_queue = nullptr,
138 .bindings{},
139 .program{},
140 .key{},
141 .num_color_buffers = 0,
142 });
143 cv.notify_one();
144}
145
146void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
147 const Vulkan::Device& device, Vulkan::VKScheduler& scheduler,
148 Vulkan::VKDescriptorPool& descriptor_pool,
149 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
150 std::vector<VkDescriptorSetLayoutBinding> bindings,
151 Vulkan::SPIRVProgram program,
152 Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) {
153 std::unique_lock lock(queue_mutex);
154 pending_queue.push({
155 .backend = Backend::Vulkan,
156 .device = nullptr,
157 .shader_type{},
158 .uid = 0,
159 .code{},
160 .code_b{},
161 .main_offset = 0,
162 .compiler_settings{},
163 .registry{},
164 .cpu_address = 0,
165 .pp_cache = pp_cache,
166 .vk_device = &device,
167 .scheduler = &scheduler,
168 .descriptor_pool = &descriptor_pool,
169 .update_descriptor_queue = &update_descriptor_queue,
170 .bindings = std::move(bindings),
171 .program = std::move(program),
172 .key = key,
173 .num_color_buffers = num_color_buffers,
174 });
175 cv.notify_one();
176}
177
178void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
179 while (!is_thread_exiting.load(std::memory_order_relaxed)) {
180 std::unique_lock lock{queue_mutex};
181 cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
182 if (is_thread_exiting) {
183 return;
184 }
185
186 // Partial lock to allow all threads to read at the same time
187 if (!HasWorkQueued()) {
188 continue;
189 }
190 // Another thread beat us, just unlock and wait for the next load
191 if (pending_queue.empty()) {
192 continue;
193 }
194
195 // Pull work from queue
196 WorkerParams work = std::move(pending_queue.front());
197 pending_queue.pop();
198 lock.unlock();
199
200 if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
201 const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
202 const auto scope = context->Acquire();
203 auto program =
204 OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
205 Result result{};
206 result.backend = work.backend;
207 result.cpu_address = work.cpu_address;
208 result.uid = work.uid;
209 result.code = std::move(work.code);
210 result.code_b = std::move(work.code_b);
211 result.shader_type = work.shader_type;
212
213 if (work.backend == Backend::OpenGL) {
214 result.program.opengl = std::move(program->source_program);
215 } else if (work.backend == Backend::GLASM) {
216 result.program.glasm = std::move(program->assembly_program);
217 }
218
219 {
220 std::unique_lock complete_lock(completed_mutex);
221 finished_work.push_back(std::move(result));
222 }
223 } else if (work.backend == Backend::Vulkan) {
224 auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
225 *work.vk_device, *work.scheduler, *work.descriptor_pool,
226 *work.update_descriptor_queue, work.key, work.bindings, work.program,
227 work.num_color_buffers);
228
229 work.pp_cache->EmplacePipeline(std::move(pipeline));
230 }
231 }
232}
233
234} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
deleted file mode 100644
index 7fdff6e56..000000000
--- a/src/video_core/shader/async_shaders.h
+++ /dev/null
@@ -1,138 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <condition_variable>
8#include <memory>
9#include <shared_mutex>
10#include <thread>
11
12#include <glad/glad.h>
13
14#include "common/common_types.h"
15#include "video_core/renderer_opengl/gl_device.h"
16#include "video_core/renderer_opengl/gl_resource_manager.h"
17#include "video_core/renderer_opengl/gl_shader_decompiler.h"
18#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
19#include "video_core/renderer_vulkan/vk_scheduler.h"
20#include "video_core/vulkan_common/vulkan_device.h"
21
22namespace Core::Frontend {
23class EmuWindow;
24class GraphicsContext;
25} // namespace Core::Frontend
26
27namespace Tegra {
28class GPU;
29}
30
31namespace Vulkan {
32class VKPipelineCache;
33}
34
35namespace VideoCommon::Shader {
36
37class AsyncShaders {
38public:
39 enum class Backend {
40 OpenGL,
41 GLASM,
42 Vulkan,
43 };
44
45 struct ResultPrograms {
46 OpenGL::OGLProgram opengl;
47 OpenGL::OGLAssemblyProgram glasm;
48 };
49
50 struct Result {
51 u64 uid;
52 VAddr cpu_address;
53 Backend backend;
54 ResultPrograms program;
55 std::vector<u64> code;
56 std::vector<u64> code_b;
57 Tegra::Engines::ShaderType shader_type;
58 };
59
60 explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_);
61 ~AsyncShaders();
62
63 /// Start up shader worker threads
64 void AllocateWorkers();
65
66 /// Clear the shader queue and kill all worker threads
67 void FreeWorkers();
68
69 // Force end all threads
70 void KillWorkers();
71
72 /// Check to see if any shaders have actually been compiled
73 [[nodiscard]] bool HasCompletedWork() const;
74
75 /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
76 /// every shader async as some shaders are only built and executed once. We try to "guess" which
77 /// shader would be used only once
78 [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const;
79
80 /// Pulls completed compiled shaders
81 [[nodiscard]] std::vector<Result> GetCompletedWork();
82
83 void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
84 u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
85 CompilerSettings compiler_settings, const Registry& registry,
86 VAddr cpu_addr);
87
88 void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device,
89 Vulkan::VKScheduler& scheduler,
90 Vulkan::VKDescriptorPool& descriptor_pool,
91 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
92 std::vector<VkDescriptorSetLayoutBinding> bindings,
93 Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key,
94 u32 num_color_buffers);
95
96private:
97 void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
98
99 /// Check our worker queue to see if we have any work queued already
100 [[nodiscard]] bool HasWorkQueued() const;
101
102 struct WorkerParams {
103 Backend backend;
104 // For OGL
105 const OpenGL::Device* device;
106 Tegra::Engines::ShaderType shader_type;
107 u64 uid;
108 std::vector<u64> code;
109 std::vector<u64> code_b;
110 u32 main_offset;
111 CompilerSettings compiler_settings;
112 std::optional<Registry> registry;
113 VAddr cpu_address;
114
115 // For Vulkan
116 Vulkan::VKPipelineCache* pp_cache;
117 const Vulkan::Device* vk_device;
118 Vulkan::VKScheduler* scheduler;
119 Vulkan::VKDescriptorPool* descriptor_pool;
120 Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
121 std::vector<VkDescriptorSetLayoutBinding> bindings;
122 Vulkan::SPIRVProgram program;
123 Vulkan::GraphicsPipelineCacheKey key;
124 u32 num_color_buffers;
125 };
126
127 std::condition_variable cv;
128 mutable std::mutex queue_mutex;
129 mutable std::shared_mutex completed_mutex;
130 std::atomic<bool> is_thread_exiting{};
131 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
132 std::vector<std::thread> worker_threads;
133 std::queue<WorkerParams> pending_queue;
134 std::vector<Result> finished_work;
135 Core::Frontend::EmuWindow& emu_window;
136};
137
138} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp
deleted file mode 100644
index cddcbd4f0..000000000
--- a/src/video_core/shader/compiler_settings.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/shader/compiler_settings.h"
6
7namespace VideoCommon::Shader {
8
9std::string CompileDepthAsString(const CompileDepth cd) {
10 switch (cd) {
11 case CompileDepth::BruteForce:
12 return "Brute Force Compile";
13 case CompileDepth::FlowStack:
14 return "Simple Flow Stack Mode";
15 case CompileDepth::NoFlowStack:
16 return "Remove Flow Stack";
17 case CompileDepth::DecompileBackwards:
18 return "Decompile Backward Jumps";
19 case CompileDepth::FullDecompile:
20 return "Full Decompilation";
21 default:
22 return "Unknown Compiler Process";
23 }
24}
25
26} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h
deleted file mode 100644
index 916018c01..000000000
--- a/src/video_core/shader/compiler_settings.h
+++ /dev/null
@@ -1,26 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/engines/shader_bytecode.h"
8
9namespace VideoCommon::Shader {
10
11enum class CompileDepth : u32 {
12 BruteForce = 0,
13 FlowStack = 1,
14 NoFlowStack = 2,
15 DecompileBackwards = 3,
16 FullDecompile = 4,
17};
18
19std::string CompileDepthAsString(CompileDepth cd);
20
21struct CompilerSettings {
22 CompileDepth depth{CompileDepth::NoFlowStack};
23 bool disable_else_derivation{true};
24};
25
26} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
deleted file mode 100644
index 43d965f2f..000000000
--- a/src/video_core/shader/control_flow.cpp
+++ /dev/null
@@ -1,751 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <list>
6#include <map>
7#include <set>
8#include <stack>
9#include <unordered_map>
10#include <vector>
11
12#include "common/assert.h"
13#include "common/common_types.h"
14#include "video_core/shader/ast.h"
15#include "video_core/shader/control_flow.h"
16#include "video_core/shader/memory_util.h"
17#include "video_core/shader/registry.h"
18#include "video_core/shader/shader_ir.h"
19
20namespace VideoCommon::Shader {
21
22namespace {
23
24using Tegra::Shader::Instruction;
25using Tegra::Shader::OpCode;
26
27constexpr s32 unassigned_branch = -2;
28
29struct Query {
30 u32 address{};
31 std::stack<u32> ssy_stack{};
32 std::stack<u32> pbk_stack{};
33};
34
35struct BlockStack {
36 BlockStack() = default;
37 explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
38 std::stack<u32> ssy_stack{};
39 std::stack<u32> pbk_stack{};
40};
41
42template <typename T, typename... Args>
43BlockBranchInfo MakeBranchInfo(Args&&... args) {
44 static_assert(std::is_convertible_v<T, BranchData>);
45 return std::make_shared<BranchData>(T(std::forward<Args>(args)...));
46}
47
48bool BlockBranchIsIgnored(BlockBranchInfo first) {
49 bool ignore = false;
50 if (std::holds_alternative<SingleBranch>(*first)) {
51 const auto branch = std::get_if<SingleBranch>(first.get());
52 ignore = branch->ignore;
53 }
54 return ignore;
55}
56
57struct BlockInfo {
58 u32 start{};
59 u32 end{};
60 bool visited{};
61 BlockBranchInfo branch{};
62
63 bool IsInside(const u32 address) const {
64 return start <= address && address <= end;
65 }
66};
67
68struct CFGRebuildState {
69 explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_)
70 : program_code{program_code_}, registry{registry_}, start{start_} {}
71
72 const ProgramCode& program_code;
73 Registry& registry;
74 u32 start{};
75 std::vector<BlockInfo> block_info;
76 std::list<u32> inspect_queries;
77 std::list<Query> queries;
78 std::unordered_map<u32, u32> registered;
79 std::set<u32> labels;
80 std::map<u32, u32> ssy_labels;
81 std::map<u32, u32> pbk_labels;
82 std::unordered_map<u32, BlockStack> stacks;
83 ASTManager* manager{};
84};
85
86enum class BlockCollision : u32 { None, Found, Inside };
87
88std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) {
89 const auto& blocks = state.block_info;
90 for (u32 index = 0; index < blocks.size(); index++) {
91 if (blocks[index].start == address) {
92 return {BlockCollision::Found, index};
93 }
94 if (blocks[index].IsInside(address)) {
95 return {BlockCollision::Inside, index};
96 }
97 }
98 return {BlockCollision::None, 0xFFFFFFFF};
99}
100
101struct ParseInfo {
102 BlockBranchInfo branch_info{};
103 u32 end_address{};
104};
105
106BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
107 auto& it = state.block_info.emplace_back();
108 it.start = start;
109 it.end = end;
110 const u32 index = static_cast<u32>(state.block_info.size() - 1);
111 state.registered.insert({start, index});
112 return it;
113}
114
115Pred GetPredicate(u32 index, bool negated) {
116 return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL));
117}
118
119enum class ParseResult : u32 {
120 ControlCaught,
121 BlockEnd,
122 AbnormalFlow,
123};
124
125struct BranchIndirectInfo {
126 u32 buffer{};
127 u32 offset{};
128 u32 entries{};
129 s32 relative_position{};
130};
131
132struct BufferInfo {
133 u32 index;
134 u32 offset;
135};
136
137std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) {
138 const Instruction instr = state.program_code[pos];
139 const auto opcode = OpCode::Decode(instr);
140 if (opcode->get().GetId() != OpCode::Id::BRX) {
141 return std::nullopt;
142 }
143 if (instr.brx.constant_buffer != 0) {
144 return std::nullopt;
145 }
146 --pos;
147 return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value());
148}
149
150template <typename Result, typename TestCallable, typename PackCallable>
151// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&>
152// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&>
153std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test,
154 PackCallable pack) {
155 for (; pos >= state.start; --pos) {
156 if (IsSchedInstruction(pos, state.start)) {
157 continue;
158 }
159 const Instruction instr = state.program_code[pos];
160 const auto opcode = OpCode::Decode(instr);
161 if (!opcode) {
162 continue;
163 }
164 if (test(instr, opcode->get())) {
165 --pos;
166 return std::make_optional(pack(instr, opcode->get()));
167 }
168 }
169 return std::nullopt;
170}
171
172std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos,
173 u64 brx_tracked_register) {
174 return TrackInstruction<std::pair<BufferInfo, u64>>(
175 state, pos,
176 [brx_tracked_register](auto instr, const auto& opcode) {
177 return opcode.GetId() == OpCode::Id::LD_C &&
178 instr.gpr0.Value() == brx_tracked_register &&
179 instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single;
180 },
181 [](auto instr, const auto& opcode) {
182 const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()),
183 static_cast<u32>(instr.cbuf36.GetOffset())};
184 return std::make_pair(info, instr.gpr8.Value());
185 });
186}
187
188std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
189 u64 ldc_tracked_register) {
190 return TrackInstruction<u64>(
191 state, pos,
192 [ldc_tracked_register](auto instr, const auto& opcode) {
193 return opcode.GetId() == OpCode::Id::SHL_IMM &&
194 instr.gpr0.Value() == ldc_tracked_register;
195 },
196 [](auto instr, const auto&) { return instr.gpr8.Value(); });
197}
198
199std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
200 u64 shl_tracked_register) {
201 return TrackInstruction<u32>(
202 state, pos,
203 [shl_tracked_register](auto instr, const auto& opcode) {
204 return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
205 instr.gpr0.Value() == shl_tracked_register;
206 },
207 [](auto instr, const auto&) {
208 return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
209 });
210}
211
212std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
213 const auto brx_info = GetBRXInfo(state, pos);
214 if (!brx_info) {
215 return std::nullopt;
216 }
217 const auto [relative_position, brx_tracked_register] = *brx_info;
218
219 const auto ldc_info = TrackLDC(state, pos, brx_tracked_register);
220 if (!ldc_info) {
221 return std::nullopt;
222 }
223 const auto [buffer_info, ldc_tracked_register] = *ldc_info;
224
225 const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register);
226 if (!shl_tracked_register) {
227 return std::nullopt;
228 }
229
230 const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register);
231 if (!entries) {
232 return std::nullopt;
233 }
234
235 return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position};
236}
237
238std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
239 u32 offset = static_cast<u32>(address);
240 const u32 end_address = static_cast<u32>(state.program_code.size());
241 ParseInfo parse_info{};
242 SingleBranch single_branch{};
243
244 const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) {
245 const auto pair = rebuild_state.labels.emplace(label_address);
246 if (pair.second) {
247 rebuild_state.inspect_queries.push_back(label_address);
248 }
249 };
250
251 while (true) {
252 if (offset >= end_address) {
253 // ASSERT_OR_EXECUTE can't be used, as it ignores the break
254 ASSERT_MSG(false, "Shader passed the current limit!");
255
256 single_branch.address = exit_branch;
257 single_branch.ignore = false;
258 break;
259 }
260 if (state.registered.contains(offset)) {
261 single_branch.address = offset;
262 single_branch.ignore = true;
263 break;
264 }
265 if (IsSchedInstruction(offset, state.start)) {
266 offset++;
267 continue;
268 }
269 const Instruction instr = {state.program_code[offset]};
270 const auto opcode = OpCode::Decode(instr);
271 if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) {
272 offset++;
273 continue;
274 }
275
276 switch (opcode->get().GetId()) {
277 case OpCode::Id::EXIT: {
278 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
279 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
280 if (single_branch.condition.predicate == Pred::NeverExecute) {
281 offset++;
282 continue;
283 }
284 const ConditionCode cc = instr.flow_condition_code;
285 single_branch.condition.cc = cc;
286 if (cc == ConditionCode::F) {
287 offset++;
288 continue;
289 }
290 single_branch.address = exit_branch;
291 single_branch.kill = false;
292 single_branch.is_sync = false;
293 single_branch.is_brk = false;
294 single_branch.ignore = false;
295 parse_info.end_address = offset;
296 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
297 single_branch.condition, single_branch.address, single_branch.kill,
298 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
299
300 return {ParseResult::ControlCaught, parse_info};
301 }
302 case OpCode::Id::BRA: {
303 if (instr.bra.constant_buffer != 0) {
304 return {ParseResult::AbnormalFlow, parse_info};
305 }
306 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
307 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
308 if (single_branch.condition.predicate == Pred::NeverExecute) {
309 offset++;
310 continue;
311 }
312 const ConditionCode cc = instr.flow_condition_code;
313 single_branch.condition.cc = cc;
314 if (cc == ConditionCode::F) {
315 offset++;
316 continue;
317 }
318 const u32 branch_offset = offset + instr.bra.GetBranchTarget();
319 if (branch_offset == 0) {
320 single_branch.address = exit_branch;
321 } else {
322 single_branch.address = branch_offset;
323 }
324 insert_label(state, branch_offset);
325 single_branch.kill = false;
326 single_branch.is_sync = false;
327 single_branch.is_brk = false;
328 single_branch.ignore = false;
329 parse_info.end_address = offset;
330 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
331 single_branch.condition, single_branch.address, single_branch.kill,
332 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
333
334 return {ParseResult::ControlCaught, parse_info};
335 }
336 case OpCode::Id::SYNC: {
337 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
338 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
339 if (single_branch.condition.predicate == Pred::NeverExecute) {
340 offset++;
341 continue;
342 }
343 const ConditionCode cc = instr.flow_condition_code;
344 single_branch.condition.cc = cc;
345 if (cc == ConditionCode::F) {
346 offset++;
347 continue;
348 }
349 single_branch.address = unassigned_branch;
350 single_branch.kill = false;
351 single_branch.is_sync = true;
352 single_branch.is_brk = false;
353 single_branch.ignore = false;
354 parse_info.end_address = offset;
355 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
356 single_branch.condition, single_branch.address, single_branch.kill,
357 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
358
359 return {ParseResult::ControlCaught, parse_info};
360 }
361 case OpCode::Id::BRK: {
362 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
363 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
364 if (single_branch.condition.predicate == Pred::NeverExecute) {
365 offset++;
366 continue;
367 }
368 const ConditionCode cc = instr.flow_condition_code;
369 single_branch.condition.cc = cc;
370 if (cc == ConditionCode::F) {
371 offset++;
372 continue;
373 }
374 single_branch.address = unassigned_branch;
375 single_branch.kill = false;
376 single_branch.is_sync = false;
377 single_branch.is_brk = true;
378 single_branch.ignore = false;
379 parse_info.end_address = offset;
380 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
381 single_branch.condition, single_branch.address, single_branch.kill,
382 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
383
384 return {ParseResult::ControlCaught, parse_info};
385 }
386 case OpCode::Id::KIL: {
387 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
388 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
389 if (single_branch.condition.predicate == Pred::NeverExecute) {
390 offset++;
391 continue;
392 }
393 const ConditionCode cc = instr.flow_condition_code;
394 single_branch.condition.cc = cc;
395 if (cc == ConditionCode::F) {
396 offset++;
397 continue;
398 }
399 single_branch.address = exit_branch;
400 single_branch.kill = true;
401 single_branch.is_sync = false;
402 single_branch.is_brk = false;
403 single_branch.ignore = false;
404 parse_info.end_address = offset;
405 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
406 single_branch.condition, single_branch.address, single_branch.kill,
407 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
408
409 return {ParseResult::ControlCaught, parse_info};
410 }
411 case OpCode::Id::SSY: {
412 const u32 target = offset + instr.bra.GetBranchTarget();
413 insert_label(state, target);
414 state.ssy_labels.emplace(offset, target);
415 break;
416 }
417 case OpCode::Id::PBK: {
418 const u32 target = offset + instr.bra.GetBranchTarget();
419 insert_label(state, target);
420 state.pbk_labels.emplace(offset, target);
421 break;
422 }
423 case OpCode::Id::BRX: {
424 const auto tmp = TrackBranchIndirectInfo(state, offset);
425 if (!tmp) {
426 LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
427 return {ParseResult::AbnormalFlow, parse_info};
428 }
429
430 const auto result = *tmp;
431 const s32 pc_target = offset + result.relative_position;
432 std::vector<CaseBranch> branches;
433 for (u32 i = 0; i < result.entries; i++) {
434 auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4);
435 if (!key) {
436 return {ParseResult::AbnormalFlow, parse_info};
437 }
438 u32 value = *key;
439 u32 target = static_cast<u32>((value >> 3) + pc_target);
440 insert_label(state, target);
441 branches.emplace_back(value, target);
442 }
443 parse_info.end_address = offset;
444 parse_info.branch_info = MakeBranchInfo<MultiBranch>(
445 static_cast<u32>(instr.gpr8.Value()), std::move(branches));
446
447 return {ParseResult::ControlCaught, parse_info};
448 }
449 default:
450 break;
451 }
452
453 offset++;
454 }
455 single_branch.kill = false;
456 single_branch.is_sync = false;
457 single_branch.is_brk = false;
458 parse_info.end_address = offset - 1;
459 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
460 single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync,
461 single_branch.is_brk, single_branch.ignore);
462 return {ParseResult::BlockEnd, parse_info};
463}
464
465bool TryInspectAddress(CFGRebuildState& state) {
466 if (state.inspect_queries.empty()) {
467 return false;
468 }
469
470 const u32 address = state.inspect_queries.front();
471 state.inspect_queries.pop_front();
472 const auto [result, block_index] = TryGetBlock(state, address);
473 switch (result) {
474 case BlockCollision::Found: {
475 return true;
476 }
477 case BlockCollision::Inside: {
478 // This case is the tricky one:
479 // We need to split the block into 2 separate blocks
480 const u32 end = state.block_info[block_index].end;
481 BlockInfo& new_block = CreateBlockInfo(state, address, end);
482 BlockInfo& current_block = state.block_info[block_index];
483 current_block.end = address - 1;
484 new_block.branch = std::move(current_block.branch);
485 BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>();
486 const auto branch = std::get_if<SingleBranch>(forward_branch.get());
487 branch->address = address;
488 branch->ignore = true;
489 current_block.branch = std::move(forward_branch);
490 return true;
491 }
492 default:
493 break;
494 }
495 const auto [parse_result, parse_info] = ParseCode(state, address);
496 if (parse_result == ParseResult::AbnormalFlow) {
497 // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction
498 return false;
499 }
500
501 BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
502 block_info.branch = parse_info.branch_info;
503 if (std::holds_alternative<SingleBranch>(*block_info.branch)) {
504 const auto branch = std::get_if<SingleBranch>(block_info.branch.get());
505 if (branch->condition.IsUnconditional()) {
506 return true;
507 }
508 const u32 fallthrough_address = parse_info.end_address + 1;
509 state.inspect_queries.push_front(fallthrough_address);
510 return true;
511 }
512 return true;
513}
514
515bool TryQuery(CFGRebuildState& state) {
516 const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels,
517 BlockInfo& block) {
518 auto gather_start = labels.lower_bound(block.start);
519 const auto gather_end = labels.upper_bound(block.end);
520 while (gather_start != gather_end) {
521 cc.push(gather_start->second);
522 ++gather_start;
523 }
524 };
525 if (state.queries.empty()) {
526 return false;
527 }
528
529 Query& q = state.queries.front();
530 const u32 block_index = state.registered[q.address];
531 BlockInfo& block = state.block_info[block_index];
532 // If the block is visited, check if the stacks match, else gather the ssy/pbk
533 // labels into the current stack and look if the branch at the end of the block
534 // consumes a label. Schedule new queries accordingly
535 if (block.visited) {
536 BlockStack& stack = state.stacks[q.address];
537 const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) &&
538 (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack);
539 state.queries.pop_front();
540 return all_okay;
541 }
542 block.visited = true;
543 state.stacks.insert_or_assign(q.address, BlockStack{q});
544
545 Query q2(q);
546 state.queries.pop_front();
547 gather_labels(q2.ssy_stack, state.ssy_labels, block);
548 gather_labels(q2.pbk_stack, state.pbk_labels, block);
549 if (std::holds_alternative<SingleBranch>(*block.branch)) {
550 auto* branch = std::get_if<SingleBranch>(block.branch.get());
551 if (!branch->condition.IsUnconditional()) {
552 q2.address = block.end + 1;
553 state.queries.push_back(q2);
554 }
555
556 auto& conditional_query = state.queries.emplace_back(q2);
557 if (branch->is_sync) {
558 if (branch->address == unassigned_branch) {
559 branch->address = conditional_query.ssy_stack.top();
560 }
561 conditional_query.ssy_stack.pop();
562 }
563 if (branch->is_brk) {
564 if (branch->address == unassigned_branch) {
565 branch->address = conditional_query.pbk_stack.top();
566 }
567 conditional_query.pbk_stack.pop();
568 }
569 conditional_query.address = branch->address;
570 return true;
571 }
572
573 const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get());
574 for (const auto& branch_case : multi_branch->branches) {
575 auto& conditional_query = state.queries.emplace_back(q2);
576 conditional_query.address = branch_case.address;
577 }
578
579 return true;
580}
581
582void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
583 const auto get_expr = [](const Condition& cond) -> Expr {
584 Expr result;
585 if (cond.cc != ConditionCode::T) {
586 result = MakeExpr<ExprCondCode>(cond.cc);
587 }
588 if (cond.predicate != Pred::UnusedIndex) {
589 u32 pred = static_cast<u32>(cond.predicate);
590 bool negate = false;
591 if (pred > 7) {
592 negate = true;
593 pred -= 8;
594 }
595 Expr extra = MakeExpr<ExprPredicate>(pred);
596 if (negate) {
597 extra = MakeExpr<ExprNot>(std::move(extra));
598 }
599 if (result) {
600 return MakeExpr<ExprAnd>(std::move(extra), std::move(result));
601 }
602 return extra;
603 }
604 if (result) {
605 return result;
606 }
607 return MakeExpr<ExprBoolean>(true);
608 };
609
610 if (std::holds_alternative<SingleBranch>(*branch_info)) {
611 const auto* branch = std::get_if<SingleBranch>(branch_info.get());
612 if (branch->address < 0) {
613 if (branch->kill) {
614 mm.InsertReturn(get_expr(branch->condition), true);
615 return;
616 }
617 mm.InsertReturn(get_expr(branch->condition), false);
618 return;
619 }
620 mm.InsertGoto(get_expr(branch->condition), branch->address);
621 return;
622 }
623 const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get());
624 for (const auto& branch_case : multi_branch->branches) {
625 mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
626 branch_case.address);
627 }
628}
629
630void DecompileShader(CFGRebuildState& state) {
631 state.manager->Init();
632 for (auto label : state.labels) {
633 state.manager->DeclareLabel(label);
634 }
635 for (const auto& block : state.block_info) {
636 if (state.labels.contains(block.start)) {
637 state.manager->InsertLabel(block.start);
638 }
639 const bool ignore = BlockBranchIsIgnored(block.branch);
640 const u32 end = ignore ? block.end + 1 : block.end;
641 state.manager->InsertBlock(block.start, end);
642 if (!ignore) {
643 InsertBranch(*state.manager, block.branch);
644 }
645 }
646 state.manager->Decompile();
647}
648
649} // Anonymous namespace
650
651std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
652 const CompilerSettings& settings,
653 Registry& registry) {
654 auto result_out = std::make_unique<ShaderCharacteristics>();
655 if (settings.depth == CompileDepth::BruteForce) {
656 result_out->settings.depth = CompileDepth::BruteForce;
657 return result_out;
658 }
659
660 CFGRebuildState state{program_code, start_address, registry};
661 // Inspect Code and generate blocks
662 state.labels.clear();
663 state.labels.emplace(start_address);
664 state.inspect_queries.push_back(state.start);
665 while (!state.inspect_queries.empty()) {
666 if (!TryInspectAddress(state)) {
667 result_out->settings.depth = CompileDepth::BruteForce;
668 return result_out;
669 }
670 }
671
672 bool use_flow_stack = true;
673
674 bool decompiled = false;
675
676 if (settings.depth != CompileDepth::FlowStack) {
677 // Decompile Stacks
678 state.queries.push_back(Query{state.start, {}, {}});
679 decompiled = true;
680 while (!state.queries.empty()) {
681 if (!TryQuery(state)) {
682 decompiled = false;
683 break;
684 }
685 }
686 }
687
688 use_flow_stack = !decompiled;
689
690 // Sort and organize results
691 std::sort(state.block_info.begin(), state.block_info.end(),
692 [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; });
693 if (decompiled && settings.depth != CompileDepth::NoFlowStack) {
694 ASTManager manager{settings.depth != CompileDepth::DecompileBackwards,
695 settings.disable_else_derivation};
696 state.manager = &manager;
697 DecompileShader(state);
698 decompiled = state.manager->IsFullyDecompiled();
699 if (!decompiled) {
700 if (settings.depth == CompileDepth::FullDecompile) {
701 LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:");
702 } else {
703 LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:");
704 }
705 state.manager->ShowCurrentState("Of Shader");
706 state.manager->Clear();
707 } else {
708 auto characteristics = std::make_unique<ShaderCharacteristics>();
709 characteristics->start = start_address;
710 characteristics->settings.depth = settings.depth;
711 characteristics->manager = std::move(manager);
712 characteristics->end = state.block_info.back().end + 1;
713 return characteristics;
714 }
715 }
716
717 result_out->start = start_address;
718 result_out->settings.depth =
719 use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack;
720 result_out->blocks.clear();
721 for (auto& block : state.block_info) {
722 ShaderBlock new_block{};
723 new_block.start = block.start;
724 new_block.end = block.end;
725 new_block.ignore_branch = BlockBranchIsIgnored(block.branch);
726 if (!new_block.ignore_branch) {
727 new_block.branch = block.branch;
728 }
729 result_out->end = std::max(result_out->end, block.end);
730 result_out->blocks.push_back(new_block);
731 }
732 if (!use_flow_stack) {
733 result_out->labels = std::move(state.labels);
734 return result_out;
735 }
736
737 auto back = result_out->blocks.begin();
738 auto next = std::next(back);
739 while (next != result_out->blocks.end()) {
740 if (!state.labels.contains(next->start) && next->start == back->end + 1) {
741 back->end = next->end;
742 next = result_out->blocks.erase(next);
743 continue;
744 }
745 back = next;
746 ++next;
747 }
748
749 return result_out;
750}
751} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
deleted file mode 100644
index 37bf96492..000000000
--- a/src/video_core/shader/control_flow.h
+++ /dev/null
@@ -1,117 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <list>
8#include <optional>
9#include <set>
10#include <variant>
11
12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/shader/ast.h"
14#include "video_core/shader/compiler_settings.h"
15#include "video_core/shader/registry.h"
16#include "video_core/shader/shader_ir.h"
17
18namespace VideoCommon::Shader {
19
20using Tegra::Shader::ConditionCode;
21using Tegra::Shader::Pred;
22
23constexpr s32 exit_branch = -1;
24
25struct Condition {
26 Pred predicate{Pred::UnusedIndex};
27 ConditionCode cc{ConditionCode::T};
28
29 bool IsUnconditional() const {
30 return predicate == Pred::UnusedIndex && cc == ConditionCode::T;
31 }
32
33 bool operator==(const Condition& other) const {
34 return std::tie(predicate, cc) == std::tie(other.predicate, other.cc);
35 }
36
37 bool operator!=(const Condition& other) const {
38 return !operator==(other);
39 }
40};
41
42class SingleBranch {
43public:
44 SingleBranch() = default;
45 explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_,
46 bool is_brk_, bool ignore_)
47 : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_},
48 ignore{ignore_} {}
49
50 bool operator==(const SingleBranch& b) const {
51 return std::tie(condition, address, kill, is_sync, is_brk, ignore) ==
52 std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore);
53 }
54
55 bool operator!=(const SingleBranch& b) const {
56 return !operator==(b);
57 }
58
59 Condition condition{};
60 s32 address{exit_branch};
61 bool kill{};
62 bool is_sync{};
63 bool is_brk{};
64 bool ignore{};
65};
66
67struct CaseBranch {
68 explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {}
69 u32 cmp_value;
70 u32 address;
71};
72
73class MultiBranch {
74public:
75 explicit MultiBranch(u32 gpr_, std::vector<CaseBranch>&& branches_)
76 : gpr{gpr_}, branches{std::move(branches_)} {}
77
78 u32 gpr{};
79 std::vector<CaseBranch> branches{};
80};
81
82using BranchData = std::variant<SingleBranch, MultiBranch>;
83using BlockBranchInfo = std::shared_ptr<BranchData>;
84
85bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second);
86
87struct ShaderBlock {
88 u32 start{};
89 u32 end{};
90 bool ignore_branch{};
91 BlockBranchInfo branch{};
92
93 bool operator==(const ShaderBlock& sb) const {
94 return std::tie(start, end, ignore_branch) ==
95 std::tie(sb.start, sb.end, sb.ignore_branch) &&
96 BlockBranchInfoAreEqual(branch, sb.branch);
97 }
98
99 bool operator!=(const ShaderBlock& sb) const {
100 return !operator==(sb);
101 }
102};
103
104struct ShaderCharacteristics {
105 std::list<ShaderBlock> blocks{};
106 std::set<u32> labels{};
107 u32 start{};
108 u32 end{};
109 ASTManager manager{true, true};
110 CompilerSettings settings{};
111};
112
113std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
114 const CompilerSettings& settings,
115 Registry& registry);
116
117} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
deleted file mode 100644
index 6576d1208..000000000
--- a/src/video_core/shader/decode.cpp
+++ /dev/null
@@ -1,368 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <limits>
7#include <set>
8
9#include <fmt/format.h>
10
11#include "common/assert.h"
12#include "common/common_types.h"
13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/engines/shader_header.h"
15#include "video_core/shader/control_flow.h"
16#include "video_core/shader/memory_util.h"
17#include "video_core/shader/node_helper.h"
18#include "video_core/shader/shader_ir.h"
19
20namespace VideoCommon::Shader {
21
22using Tegra::Shader::Instruction;
23using Tegra::Shader::OpCode;
24
25namespace {
26
27void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
28 const std::list<SamplerEntry>& used_samplers) {
29 if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
30 return;
31 }
32 u32 count{};
33 std::vector<u32> bound_offsets;
34 for (const auto& sampler : used_samplers) {
35 if (sampler.is_bindless) {
36 continue;
37 }
38 ++count;
39 bound_offsets.emplace_back(sampler.offset);
40 }
41 if (count > 1) {
42 gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
43 }
44}
45
46std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce,
47 VideoCore::GuestDriverProfile& gpu_driver,
48 const std::list<SamplerEntry>& used_samplers) {
49 const u32 base_offset = sampler_to_deduce.offset;
50 u32 max_offset{std::numeric_limits<u32>::max()};
51 for (const auto& sampler : used_samplers) {
52 if (sampler.is_bindless) {
53 continue;
54 }
55 if (sampler.offset > base_offset) {
56 max_offset = std::min(sampler.offset, max_offset);
57 }
58 }
59 if (max_offset == std::numeric_limits<u32>::max()) {
60 return std::nullopt;
61 }
62 return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
63}
64
65} // Anonymous namespace
66
67class ASTDecoder {
68public:
69 explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {}
70
71 void operator()(ASTProgram& ast) {
72 ASTNode current = ast.nodes.GetFirst();
73 while (current) {
74 Visit(current);
75 current = current->GetNext();
76 }
77 }
78
79 void operator()(ASTIfThen& ast) {
80 ASTNode current = ast.nodes.GetFirst();
81 while (current) {
82 Visit(current);
83 current = current->GetNext();
84 }
85 }
86
87 void operator()(ASTIfElse& ast) {
88 ASTNode current = ast.nodes.GetFirst();
89 while (current) {
90 Visit(current);
91 current = current->GetNext();
92 }
93 }
94
95 void operator()(ASTBlockEncoded& ast) {}
96
97 void operator()(ASTBlockDecoded& ast) {}
98
99 void operator()(ASTVarSet& ast) {}
100
101 void operator()(ASTLabel& ast) {}
102
103 void operator()(ASTGoto& ast) {}
104
105 void operator()(ASTDoWhile& ast) {
106 ASTNode current = ast.nodes.GetFirst();
107 while (current) {
108 Visit(current);
109 current = current->GetNext();
110 }
111 }
112
113 void operator()(ASTReturn& ast) {}
114
115 void operator()(ASTBreak& ast) {}
116
117 void Visit(ASTNode& node) {
118 std::visit(*this, *node->GetInnerData());
119 if (node->IsBlockEncoded()) {
120 auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData());
121 NodeBlock bb = ir.DecodeRange(block->start, block->end);
122 node->TransformBlockEncoded(std::move(bb));
123 }
124 }
125
126private:
127 ShaderIR& ir;
128};
129
130void ShaderIR::Decode() {
131 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
132
133 decompiled = false;
134 auto info = ScanFlow(program_code, main_offset, settings, registry);
135 auto& shader_info = *info;
136 coverage_begin = shader_info.start;
137 coverage_end = shader_info.end;
138 switch (shader_info.settings.depth) {
139 case CompileDepth::FlowStack: {
140 for (const auto& block : shader_info.blocks) {
141 basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
142 }
143 break;
144 }
145 case CompileDepth::NoFlowStack: {
146 disable_flow_stack = true;
147 const auto insert_block = [this](NodeBlock& nodes, u32 label) {
148 if (label == static_cast<u32>(exit_branch)) {
149 return;
150 }
151 basic_blocks.insert({label, nodes});
152 };
153 const auto& blocks = shader_info.blocks;
154 NodeBlock current_block;
155 u32 current_label = static_cast<u32>(exit_branch);
156 for (const auto& block : blocks) {
157 if (shader_info.labels.contains(block.start)) {
158 insert_block(current_block, current_label);
159 current_block.clear();
160 current_label = block.start;
161 }
162 if (!block.ignore_branch) {
163 DecodeRangeInner(current_block, block.start, block.end);
164 InsertControlFlow(current_block, block);
165 } else {
166 DecodeRangeInner(current_block, block.start, block.end + 1);
167 }
168 }
169 insert_block(current_block, current_label);
170 break;
171 }
172 case CompileDepth::DecompileBackwards:
173 case CompileDepth::FullDecompile: {
174 program_manager = std::move(shader_info.manager);
175 disable_flow_stack = true;
176 decompiled = true;
177 ASTDecoder decoder{*this};
178 ASTNode program = GetASTProgram();
179 decoder.Visit(program);
180 break;
181 }
182 default:
183 LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
184 [[fallthrough]];
185 case CompileDepth::BruteForce: {
186 const auto shader_end = static_cast<u32>(program_code.size());
187 coverage_begin = main_offset;
188 coverage_end = shader_end;
189 for (u32 label = main_offset; label < shader_end; ++label) {
190 basic_blocks.insert({label, DecodeRange(label, label + 1)});
191 }
192 break;
193 }
194 }
195 if (settings.depth != shader_info.settings.depth) {
196 LOG_WARNING(
197 HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
198 CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth));
199 }
200}
201
202NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
203 NodeBlock basic_block;
204 DecodeRangeInner(basic_block, begin, end);
205 return basic_block;
206}
207
208void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
209 for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
210 pc = DecodeInstr(bb, pc);
211 }
212}
213
214void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
215 const auto apply_conditions = [&](const Condition& cond, Node n) -> Node {
216 Node result = n;
217 if (cond.cc != ConditionCode::T) {
218 result = Conditional(GetConditionCode(cond.cc), {result});
219 }
220 if (cond.predicate != Pred::UnusedIndex) {
221 u32 pred = static_cast<u32>(cond.predicate);
222 const bool is_neg = pred > 7;
223 if (is_neg) {
224 pred -= 8;
225 }
226 result = Conditional(GetPredicate(pred, is_neg), {result});
227 }
228 return result;
229 };
230 if (std::holds_alternative<SingleBranch>(*block.branch)) {
231 auto branch = std::get_if<SingleBranch>(block.branch.get());
232 if (branch->address < 0) {
233 if (branch->kill) {
234 Node n = Operation(OperationCode::Discard);
235 n = apply_conditions(branch->condition, n);
236 bb.push_back(n);
237 global_code.push_back(n);
238 return;
239 }
240 Node n = Operation(OperationCode::Exit);
241 n = apply_conditions(branch->condition, n);
242 bb.push_back(n);
243 global_code.push_back(n);
244 return;
245 }
246 Node n = Operation(OperationCode::Branch, Immediate(branch->address));
247 n = apply_conditions(branch->condition, n);
248 bb.push_back(n);
249 global_code.push_back(n);
250 return;
251 }
252 auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
253 Node op_a = GetRegister(multi_branch->gpr);
254 for (auto& branch_case : multi_branch->branches) {
255 Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
256 Node op_b = Immediate(branch_case.cmp_value);
257 Node condition =
258 GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b);
259 auto result = Conditional(condition, {n});
260 bb.push_back(result);
261 global_code.push_back(result);
262 }
263}
264
265u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
266 // Ignore sched instructions when generating code.
267 if (IsSchedInstruction(pc, main_offset)) {
268 return pc + 1;
269 }
270
271 const Instruction instr = {program_code[pc]};
272 const auto opcode = OpCode::Decode(instr);
273 const u32 nv_address = ConvertAddressToNvidiaSpace(pc);
274
275 // Decoding failure
276 if (!opcode) {
277 UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
278 bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})",
279 nv_address, instr.value)));
280 return pc + 1;
281 }
282
283 bb.push_back(Comment(
284 fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value)));
285
286 using Tegra::Shader::Pred;
287 UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
288 "NeverExecute predicate not implemented");
289
290 static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = {
291 {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
292 {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
293 {OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
294 {OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
295 {OpCode::Type::Shift, &ShaderIR::DecodeShift},
296 {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
297 {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
298 {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
299 {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
300 {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
301 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
302 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
303 {OpCode::Type::Warp, &ShaderIR::DecodeWarp},
304 {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
305 {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
306 {OpCode::Type::Image, &ShaderIR::DecodeImage},
307 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
308 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
309 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
310 {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
311 {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
312 {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
313 {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
314 {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
315 {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
316 {OpCode::Type::Video, &ShaderIR::DecodeVideo},
317 {OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
318 };
319
320 std::vector<Node> tmp_block;
321 if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
322 pc = (this->*decoder->second)(tmp_block, pc);
323 } else {
324 pc = DecodeOther(tmp_block, pc);
325 }
326
327 // Some instructions (like SSY) don't have a predicate field, they are always unconditionally
328 // executed.
329 const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());
330 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
331
332 if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
333 const Node conditional =
334 Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block));
335 global_code.push_back(conditional);
336 bb.push_back(conditional);
337 } else {
338 for (auto& node : tmp_block) {
339 global_code.push_back(node);
340 bb.push_back(node);
341 }
342 }
343
344 return pc + 1;
345}
346
347void ShaderIR::PostDecode() {
348 // Deduce texture handler size if needed
349 auto gpu_driver = registry.AccessGuestDriverProfile();
350 DeduceTextureHandlerSize(gpu_driver, used_samplers);
351 // Deduce Indexed Samplers
352 if (!uses_indexed_samplers) {
353 return;
354 }
355 for (auto& sampler : used_samplers) {
356 if (!sampler.is_indexed) {
357 continue;
358 }
359 if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) {
360 sampler.size = *size;
361 } else {
362 LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler");
363 sampler.size = 1;
364 }
365 }
366}
367
368} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
deleted file mode 100644
index 15eb700e7..000000000
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::SubOp;
17
18u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
19 const Instruction instr = {program_code[pc]};
20 const auto opcode = OpCode::Decode(instr);
21
22 Node op_a = GetRegister(instr.gpr8);
23
24 Node op_b = [&] {
25 if (instr.is_b_imm) {
26 return GetImmediate19(instr);
27 } else if (instr.is_b_gpr) {
28 return GetRegister(instr.gpr20);
29 } else {
30 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
31 }
32 }();
33
34 switch (opcode->get().GetId()) {
35 case OpCode::Id::MOV_C:
36 case OpCode::Id::MOV_R: {
37 // MOV does not have neither 'abs' nor 'neg' bits.
38 SetRegister(bb, instr.gpr0, op_b);
39 break;
40 }
41 case OpCode::Id::FMUL_C:
42 case OpCode::Id::FMUL_R:
43 case OpCode::Id::FMUL_IMM: {
44 // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
45 if (instr.fmul.tab5cb8_2 != 0) {
46 LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
47 instr.fmul.tab5cb8_2.Value());
48 }
49 if (instr.fmul.tab5c68_0 != 1) {
50 LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
51 instr.fmul.tab5c68_0.Value());
52 }
53
54 op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
55
56 static constexpr std::array FmulPostFactor = {
57 1.000f, // None
58 0.500f, // Divide 2
59 0.250f, // Divide 4
60 0.125f, // Divide 8
61 8.000f, // Mul 8
62 4.000f, // Mul 4
63 2.000f, // Mul 2
64 };
65
66 if (instr.fmul.postfactor != 0) {
67 op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a,
68 Immediate(FmulPostFactor[instr.fmul.postfactor]));
69 }
70
71 // TODO(Rodrigo): Should precise be used when there's a postfactor?
72 Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
73
74 value = GetSaturatedFloat(value, instr.alu.saturate_d);
75
76 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
77 SetRegister(bb, instr.gpr0, value);
78 break;
79 }
80 case OpCode::Id::FADD_C:
81 case OpCode::Id::FADD_R:
82 case OpCode::Id::FADD_IMM: {
83 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
84 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
85
86 Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
87 value = GetSaturatedFloat(value, instr.alu.saturate_d);
88
89 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
90 SetRegister(bb, instr.gpr0, value);
91 break;
92 }
93 case OpCode::Id::MUFU: {
94 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
95
96 Node value = [&]() {
97 switch (instr.sub_op) {
98 case SubOp::Cos:
99 return Operation(OperationCode::FCos, PRECISE, op_a);
100 case SubOp::Sin:
101 return Operation(OperationCode::FSin, PRECISE, op_a);
102 case SubOp::Ex2:
103 return Operation(OperationCode::FExp2, PRECISE, op_a);
104 case SubOp::Lg2:
105 return Operation(OperationCode::FLog2, PRECISE, op_a);
106 case SubOp::Rcp:
107 return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a);
108 case SubOp::Rsq:
109 return Operation(OperationCode::FInverseSqrt, PRECISE, op_a);
110 case SubOp::Sqrt:
111 return Operation(OperationCode::FSqrt, PRECISE, op_a);
112 default:
113 UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value());
114 return Immediate(0);
115 }
116 }();
117 value = GetSaturatedFloat(value, instr.alu.saturate_d);
118
119 SetRegister(bb, instr.gpr0, value);
120 break;
121 }
122 case OpCode::Id::FMNMX_C:
123 case OpCode::Id::FMNMX_R:
124 case OpCode::Id::FMNMX_IMM: {
125 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
126 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
127
128 const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
129
130 const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
131 const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
132 const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
133
134 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
135 SetRegister(bb, instr.gpr0, value);
136 break;
137 }
138 case OpCode::Id::FCMP_RR:
139 case OpCode::Id::FCMP_RC:
140 case OpCode::Id::FCMP_IMMR: {
141 UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
142 Node op_c = GetRegister(instr.gpr39);
143 Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
144 SetRegister(
145 bb, instr.gpr0,
146 Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b)));
147 break;
148 }
149 case OpCode::Id::RRO_C:
150 case OpCode::Id::RRO_R:
151 case OpCode::Id::RRO_IMM: {
152 LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
153
154 // Currently RRO is only implemented as a register move.
155 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
156 SetRegister(bb, instr.gpr0, op_b);
157 break;
158 }
159 default:
160 UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
161 }
162
163 return pc;
164}
165
166} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
deleted file mode 100644
index 88103fede..000000000
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::HalfType;
15using Tegra::Shader::Instruction;
16using Tegra::Shader::OpCode;
17
18u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
19 const Instruction instr = {program_code[pc]};
20 const auto opcode = OpCode::Decode(instr);
21
22 bool negate_a = false;
23 bool negate_b = false;
24 bool absolute_a = false;
25 bool absolute_b = false;
26
27 switch (opcode->get().GetId()) {
28 case OpCode::Id::HADD2_R:
29 if (instr.alu_half.ftz == 0) {
30 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
31 }
32 negate_a = ((instr.value >> 43) & 1) != 0;
33 negate_b = ((instr.value >> 31) & 1) != 0;
34 absolute_a = ((instr.value >> 44) & 1) != 0;
35 absolute_b = ((instr.value >> 30) & 1) != 0;
36 break;
37 case OpCode::Id::HADD2_C:
38 if (instr.alu_half.ftz == 0) {
39 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
40 }
41 negate_a = ((instr.value >> 43) & 1) != 0;
42 negate_b = ((instr.value >> 56) & 1) != 0;
43 absolute_a = ((instr.value >> 44) & 1) != 0;
44 absolute_b = ((instr.value >> 54) & 1) != 0;
45 break;
46 case OpCode::Id::HMUL2_R:
47 negate_a = ((instr.value >> 43) & 1) != 0;
48 absolute_a = ((instr.value >> 44) & 1) != 0;
49 absolute_b = ((instr.value >> 30) & 1) != 0;
50 break;
51 case OpCode::Id::HMUL2_C:
52 negate_b = ((instr.value >> 31) & 1) != 0;
53 absolute_a = ((instr.value >> 44) & 1) != 0;
54 absolute_b = ((instr.value >> 54) & 1) != 0;
55 break;
56 default:
57 UNREACHABLE();
58 break;
59 }
60
61 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
62 op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a);
63
64 auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> {
65 switch (opcode->get().GetId()) {
66 case OpCode::Id::HADD2_C:
67 case OpCode::Id::HMUL2_C:
68 return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
69 case OpCode::Id::HADD2_R:
70 case OpCode::Id::HMUL2_R:
71 return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
72 default:
73 UNREACHABLE();
74 return {HalfType::F32, Immediate(0)};
75 }
76 }();
77 op_b = UnpackHalfFloat(op_b, type_b);
78 op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b);
79
80 Node value = [this, opcode, op_a, op_b = op_b] {
81 switch (opcode->get().GetId()) {
82 case OpCode::Id::HADD2_C:
83 case OpCode::Id::HADD2_R:
84 return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
85 case OpCode::Id::HMUL2_C:
86 case OpCode::Id::HMUL2_R:
87 return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
88 default:
89 UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
90 return Immediate(0);
91 }
92 }();
93 value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
94 value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
95
96 SetRegister(bb, instr.gpr0, value);
97
98 return pc;
99}
100
101} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
deleted file mode 100644
index d179b9873..000000000
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16
17u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20
21 if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
22 if (instr.alu_half_imm.ftz == 0) {
23 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
24 }
25 } else {
26 if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) {
27 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
28 }
29 }
30
31 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
32 op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
33
34 const Node op_b = UnpackHalfImmediate(instr, true);
35
36 Node value = [&]() {
37 switch (opcode->get().GetId()) {
38 case OpCode::Id::HADD2_IMM:
39 return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
40 case OpCode::Id::HMUL2_IMM:
41 return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
42 default:
43 UNREACHABLE();
44 return Immediate(0);
45 }
46 }();
47
48 value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate);
49 value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
50 SetRegister(bb, instr.gpr0, value);
51 return pc;
52}
53
54} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
deleted file mode 100644
index f1875967c..000000000
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 switch (opcode->get().GetId()) {
21 case OpCode::Id::MOV32_IMM: {
22 SetRegister(bb, instr.gpr0, GetImmediate32(instr));
23 break;
24 }
25 case OpCode::Id::FMUL32_IMM: {
26 Node value =
27 Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
28 value = GetSaturatedFloat(value, instr.fmul32.saturate);
29
30 SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
31 SetRegister(bb, instr.gpr0, value);
32 break;
33 }
34 case OpCode::Id::FADD32I: {
35 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
36 instr.fadd32i.negate_a);
37 const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
38 instr.fadd32i.negate_b);
39
40 const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
41 SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
42 SetRegister(bb, instr.gpr0, value);
43 break;
44 }
45 default:
46 UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}",
47 opcode->get().GetName());
48 }
49
50 return pc;
51}
52
53} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
deleted file mode 100644
index 7b5bb7003..000000000
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ /dev/null
@@ -1,375 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::IAdd3Height;
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::Pred;
17using Tegra::Shader::Register;
18
19u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
20 const Instruction instr = {program_code[pc]};
21 const auto opcode = OpCode::Decode(instr);
22
23 Node op_a = GetRegister(instr.gpr8);
24 Node op_b = [&]() {
25 if (instr.is_b_imm) {
26 return Immediate(instr.alu.GetSignedImm20_20());
27 } else if (instr.is_b_gpr) {
28 return GetRegister(instr.gpr20);
29 } else {
30 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
31 }
32 }();
33
34 switch (opcode->get().GetId()) {
35 case OpCode::Id::IADD_C:
36 case OpCode::Id::IADD_R:
37 case OpCode::Id::IADD_IMM: {
38 UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT");
39 UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC");
40
41 op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
42 op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
43
44 Node value = Operation(OperationCode::UAdd, op_a, op_b);
45
46 if (instr.iadd.x) {
47 Node carry = GetInternalFlag(InternalFlag::Carry);
48 Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0));
49 value = Operation(OperationCode::UAdd, std::move(value), std::move(x));
50 }
51
52 if (instr.generates_cc) {
53 const Node i0 = Immediate(0);
54
55 Node zero = Operation(OperationCode::LogicalIEqual, value, i0);
56 Node sign = Operation(OperationCode::LogicalILessThan, value, i0);
57 Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b);
58
59 Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0);
60 Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0);
61 Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b));
62 Node overflow = Operation(OperationCode::LogicalAnd, pos, sign);
63
64 SetInternalFlag(bb, InternalFlag::Zero, std::move(zero));
65 SetInternalFlag(bb, InternalFlag::Sign, std::move(sign));
66 SetInternalFlag(bb, InternalFlag::Carry, std::move(carry));
67 SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow));
68 }
69 SetRegister(bb, instr.gpr0, std::move(value));
70 break;
71 }
72 case OpCode::Id::IADD3_C:
73 case OpCode::Id::IADD3_R:
74 case OpCode::Id::IADD3_IMM: {
75 Node op_c = GetRegister(instr.gpr39);
76
77 const auto ApplyHeight = [&](IAdd3Height height, Node value) {
78 switch (height) {
79 case IAdd3Height::None:
80 return value;
81 case IAdd3Height::LowerHalfWord:
82 return BitfieldExtract(value, 0, 16);
83 case IAdd3Height::UpperHalfWord:
84 return BitfieldExtract(value, 16, 16);
85 default:
86 UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height);
87 return Immediate(0);
88 }
89 };
90
91 if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
92 op_a = ApplyHeight(instr.iadd3.height_a, op_a);
93 op_b = ApplyHeight(instr.iadd3.height_b, op_b);
94 op_c = ApplyHeight(instr.iadd3.height_c, op_c);
95 }
96
97 op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true);
98 op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
99 op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
100
101 const Node value = [&] {
102 Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
103 if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
104 return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
105 }
106 const Node shifted = [&] {
107 switch (instr.iadd3.mode) {
108 case Tegra::Shader::IAdd3Mode::RightShift:
109 // TODO(tech4me): According to
110 // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
111 // The addition between op_a and op_b should be done in uint33, more
112 // investigation required
113 return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab,
114 Immediate(16));
115 case Tegra::Shader::IAdd3Mode::LeftShift:
116 return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab,
117 Immediate(16));
118 default:
119 return add_ab;
120 }
121 }();
122 return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
123 }();
124
125 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
126 SetRegister(bb, instr.gpr0, value);
127 break;
128 }
129 case OpCode::Id::ISCADD_C:
130 case OpCode::Id::ISCADD_R:
131 case OpCode::Id::ISCADD_IMM: {
132 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
133 "Condition codes generation in ISCADD is not implemented");
134
135 op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
136 op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
137
138 const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
139 const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
140 const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
141
142 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
143 SetRegister(bb, instr.gpr0, value);
144 break;
145 }
146 case OpCode::Id::POPC_C:
147 case OpCode::Id::POPC_R:
148 case OpCode::Id::POPC_IMM: {
149 if (instr.popc.invert) {
150 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
151 }
152 const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b);
153 SetRegister(bb, instr.gpr0, value);
154 break;
155 }
156 case OpCode::Id::FLO_R:
157 case OpCode::Id::FLO_C:
158 case OpCode::Id::FLO_IMM: {
159 Node value;
160 if (instr.flo.invert) {
161 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
162 }
163 if (instr.flo.is_signed) {
164 value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b));
165 } else {
166 value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b));
167 }
168 if (instr.flo.sh) {
169 value =
170 Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31));
171 }
172 SetRegister(bb, instr.gpr0, std::move(value));
173 break;
174 }
175 case OpCode::Id::SEL_C:
176 case OpCode::Id::SEL_R:
177 case OpCode::Id::SEL_IMM: {
178 const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0);
179 const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b);
180 SetRegister(bb, instr.gpr0, value);
181 break;
182 }
183 case OpCode::Id::ICMP_CR:
184 case OpCode::Id::ICMP_R:
185 case OpCode::Id::ICMP_RC:
186 case OpCode::Id::ICMP_IMM: {
187 const Node zero = Immediate(0);
188
189 const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
190 switch (opcode->get().GetId()) {
191 case OpCode::Id::ICMP_CR:
192 return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
193 GetRegister(instr.gpr39)};
194 case OpCode::Id::ICMP_R:
195 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
196 case OpCode::Id::ICMP_RC:
197 return {GetRegister(instr.gpr39),
198 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
199 case OpCode::Id::ICMP_IMM:
200 return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
201 default:
202 UNREACHABLE();
203 return {zero, zero};
204 }
205 }();
206 const Node op_lhs = GetRegister(instr.gpr8);
207 const Node comparison =
208 GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero);
209 SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs));
210 break;
211 }
212 case OpCode::Id::LOP_C:
213 case OpCode::Id::LOP_R:
214 case OpCode::Id::LOP_IMM: {
215 if (instr.alu.lop.invert_a)
216 op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
217 if (instr.alu.lop.invert_b)
218 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
219
220 WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
221 instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
222 instr.generates_cc);
223 break;
224 }
225 case OpCode::Id::LOP3_C:
226 case OpCode::Id::LOP3_R:
227 case OpCode::Id::LOP3_IMM: {
228 const Node op_c = GetRegister(instr.gpr39);
229 const Node lut = [&]() {
230 if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
231 return Immediate(instr.alu.lop3.GetImmLut28());
232 } else {
233 return Immediate(instr.alu.lop3.GetImmLut48());
234 }
235 }();
236
237 WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
238 break;
239 }
240 case OpCode::Id::IMNMX_C:
241 case OpCode::Id::IMNMX_R:
242 case OpCode::Id::IMNMX_IMM: {
243 UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
244
245 const bool is_signed = instr.imnmx.is_signed;
246
247 const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
248 const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
249 const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
250 const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
251
252 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
253 SetRegister(bb, instr.gpr0, value);
254 break;
255 }
256 case OpCode::Id::LEA_R2:
257 case OpCode::Id::LEA_R1:
258 case OpCode::Id::LEA_IMM:
259 case OpCode::Id::LEA_RZ:
260 case OpCode::Id::LEA_HI: {
261 auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> {
262 switch (opcode->get().GetId()) {
263 case OpCode::Id::LEA_R2: {
264 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
265 Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
266 }
267 case OpCode::Id::LEA_R1: {
268 const bool neg = instr.lea.r1.neg != 0;
269 return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
270 GetRegister(instr.gpr20),
271 Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
272 }
273 case OpCode::Id::LEA_IMM: {
274 const bool neg = instr.lea.imm.neg != 0;
275 return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
276 Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
277 Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
278 }
279 case OpCode::Id::LEA_RZ: {
280 const bool neg = instr.lea.rz.neg != 0;
281 return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
282 GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
283 Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
284 }
285 case OpCode::Id::LEA_HI:
286 default:
287 UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
288
289 return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8),
290 Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
291 }
292 }();
293
294 UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
295 "Unhandled LEA Predicate");
296
297 Node value =
298 Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_));
299 value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value));
300 SetRegister(bb, instr.gpr0, std::move(value));
301
302 break;
303 }
304 default:
305 UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
306 }
307
308 return pc;
309}
310
311void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
312 Node imm_lut, bool sets_cc) {
313 const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) {
314 Node value = Immediate(0);
315 const ImmediateNode imm = std::get<ImmediateNode>(*ttbl);
316 if (imm.GetValue() & 0x01) {
317 const Node a = Operation(OperationCode::IBitwiseNot, na);
318 const Node b = Operation(OperationCode::IBitwiseNot, nb);
319 const Node c = Operation(OperationCode::IBitwiseNot, nc);
320 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
321 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
322 value = Operation(OperationCode::IBitwiseOr, value, r);
323 }
324 if (imm.GetValue() & 0x02) {
325 const Node a = Operation(OperationCode::IBitwiseNot, na);
326 const Node b = Operation(OperationCode::IBitwiseNot, nb);
327 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
328 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
329 value = Operation(OperationCode::IBitwiseOr, value, r);
330 }
331 if (imm.GetValue() & 0x04) {
332 const Node a = Operation(OperationCode::IBitwiseNot, na);
333 const Node c = Operation(OperationCode::IBitwiseNot, nc);
334 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
335 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
336 value = Operation(OperationCode::IBitwiseOr, value, r);
337 }
338 if (imm.GetValue() & 0x08) {
339 const Node a = Operation(OperationCode::IBitwiseNot, na);
340 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
341 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
342 value = Operation(OperationCode::IBitwiseOr, value, r);
343 }
344 if (imm.GetValue() & 0x10) {
345 const Node b = Operation(OperationCode::IBitwiseNot, nb);
346 const Node c = Operation(OperationCode::IBitwiseNot, nc);
347 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
348 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
349 value = Operation(OperationCode::IBitwiseOr, value, r);
350 }
351 if (imm.GetValue() & 0x20) {
352 const Node b = Operation(OperationCode::IBitwiseNot, nb);
353 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
354 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
355 value = Operation(OperationCode::IBitwiseOr, value, r);
356 }
357 if (imm.GetValue() & 0x40) {
358 const Node c = Operation(OperationCode::IBitwiseNot, nc);
359 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
360 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
361 value = Operation(OperationCode::IBitwiseOr, value, r);
362 }
363 if (imm.GetValue() & 0x80) {
364 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
365 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
366 value = Operation(OperationCode::IBitwiseOr, value, r);
367 }
368 return value;
369 }(op_a, op_b, op_c, imm_lut);
370
371 SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc);
372 SetRegister(bb, dest, lop3_fast);
373}
374
375} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
deleted file mode 100644
index 73580277a..000000000
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::LogicOperation;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::Pred;
17using Tegra::Shader::PredicateResultMode;
18using Tegra::Shader::Register;
19
20u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
21 const Instruction instr = {program_code[pc]};
22 const auto opcode = OpCode::Decode(instr);
23
24 Node op_a = GetRegister(instr.gpr8);
25 Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32));
26
27 switch (opcode->get().GetId()) {
28 case OpCode::Id::IADD32I: {
29 UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
30
31 op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true);
32
33 Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b));
34
35 SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0);
36 SetRegister(bb, instr.gpr0, std::move(value));
37 break;
38 }
39 case OpCode::Id::LOP32I: {
40 if (instr.alu.lop32i.invert_a) {
41 op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a));
42 }
43
44 if (instr.alu.lop32i.invert_b) {
45 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
46 }
47
48 WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a),
49 std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex,
50 instr.op_32.generates_cc != 0);
51 break;
52 }
53 default:
54 UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
55 opcode->get().GetName());
56 }
57
58 return pc;
59}
60
61void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
62 Node op_b, PredicateResultMode predicate_mode, Pred predicate,
63 bool sets_cc) {
64 Node result = [&] {
65 switch (logic_op) {
66 case LogicOperation::And:
67 return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b));
68 case LogicOperation::Or:
69 return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b));
70 case LogicOperation::Xor:
71 return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b));
72 case LogicOperation::PassB:
73 return op_b;
74 default:
75 UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op);
76 return Immediate(0);
77 }
78 }();
79
80 SetInternalFlagsFromInteger(bb, result, sets_cc);
81 SetRegister(bb, dest, result);
82
83 // Write the predicate value depending on the predicate mode.
84 switch (predicate_mode) {
85 case PredicateResultMode::None:
86 // Do nothing.
87 return;
88 case PredicateResultMode::NotZero: {
89 // Set the predicate to true if the result is not zero.
90 Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0));
91 SetPredicate(bb, static_cast<u64>(predicate), std::move(compare));
92 break;
93 }
94 default:
95 UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode);
96 }
97}
98
99} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
deleted file mode 100644
index 8e3b46e8e..000000000
--- a/src/video_core/shader/decode/bfe.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 Node op_a = GetRegister(instr.gpr8);
21 Node op_b = [&] {
22 switch (opcode->get().GetId()) {
23 case OpCode::Id::BFE_R:
24 return GetRegister(instr.gpr20);
25 case OpCode::Id::BFE_C:
26 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
27 case OpCode::Id::BFE_IMM:
28 return Immediate(instr.alu.GetSignedImm20_20());
29 default:
30 UNREACHABLE();
31 return Immediate(0);
32 }
33 }();
34
35 UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
36
37 const bool is_signed = instr.bfe.is_signed;
38
39 // using reverse parallel method in
40 // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
41 // note for later if possible to implement faster method.
42 if (instr.bfe.brev) {
43 const auto swap = [&](u32 s, u32 mask) {
44 Node v1 =
45 SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
46 if (mask != 0) {
47 v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
48 Immediate(mask));
49 }
50 Node v2 = op_a;
51 if (mask != 0) {
52 v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
53 Immediate(mask));
54 }
55 v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
56 Immediate(s));
57 return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
58 std::move(v2));
59 };
60 op_a = swap(1, 0x55555555U);
61 op_a = swap(2, 0x33333333U);
62 op_a = swap(4, 0x0F0F0F0FU);
63 op_a = swap(8, 0x00FF00FFU);
64 op_a = swap(16, 0);
65 }
66
67 const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
68 Immediate(0), Immediate(8));
69 const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
70 Immediate(8), Immediate(8));
71 auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
72 SetRegister(bb, instr.gpr0, std::move(result));
73
74 return pc;
75}
76
77} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
deleted file mode 100644
index 70d1c055b..000000000
--- a/src/video_core/shader/decode/bfi.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> {
21 switch (opcode->get().GetId()) {
22 case OpCode::Id::BFI_RC:
23 return {GetRegister(instr.gpr39),
24 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
25 case OpCode::Id::BFI_IMM_R:
26 return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
27 default:
28 UNREACHABLE();
29 return {Immediate(0), Immediate(0)};
30 }
31 }();
32 const Node insert = GetRegister(instr.gpr8);
33 const Node offset = BitfieldExtract(packed_shift, 0, 8);
34 const Node bits = BitfieldExtract(packed_shift, 8, 8);
35
36 const Node value =
37 Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
38
39 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
40 SetRegister(bb, instr.gpr0, value);
41
42 return pc;
43}
44
45} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
deleted file mode 100644
index fea7a54df..000000000
--- a/src/video_core/shader/decode/conversion.cpp
+++ /dev/null
@@ -1,321 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <limits>
6#include <optional>
7#include <utility>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/node_helper.h"
13#include "video_core/shader/shader_ir.h"
14
15namespace VideoCommon::Shader {
16
17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode;
19using Tegra::Shader::Register;
20
21namespace {
22
23constexpr OperationCode GetFloatSelector(u64 selector) {
24 return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
25}
26
27constexpr u32 SizeInBits(Register::Size size) {
28 switch (size) {
29 case Register::Size::Byte:
30 return 8;
31 case Register::Size::Short:
32 return 16;
33 case Register::Size::Word:
34 return 32;
35 case Register::Size::Long:
36 return 64;
37 }
38 return 0;
39}
40
41constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size,
42 Register::Size dst_size,
43 bool src_signed,
44 bool dst_signed) {
45 const u32 dst_bits = SizeInBits(dst_size);
46 if (src_size == Register::Size::Word && dst_size == Register::Size::Word) {
47 if (src_signed == dst_signed) {
48 return std::nullopt;
49 }
50 return std::make_pair(0, std::numeric_limits<s32>::max());
51 }
52 if (dst_signed) {
53 // Signed destination, clamp to [-128, 127] for instance
54 return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1);
55 } else {
56 // Unsigned destination
57 if (dst_bits == 32) {
58 // Avoid shifting by 32, that is undefined behavior
59 return std::make_pair(0, s32(std::numeric_limits<u32>::max()));
60 }
61 return std::make_pair(0, (1 << dst_bits) - 1);
62 }
63}
64
65} // Anonymous namespace
66
67u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
68 const Instruction instr = {program_code[pc]};
69 const auto opcode = OpCode::Decode(instr);
70
71 switch (opcode->get().GetId()) {
72 case OpCode::Id::I2I_R:
73 case OpCode::Id::I2I_C:
74 case OpCode::Id::I2I_IMM: {
75 const bool src_signed = instr.conversion.is_input_signed;
76 const bool dst_signed = instr.conversion.is_output_signed;
77 const Register::Size src_size = instr.conversion.src_size;
78 const Register::Size dst_size = instr.conversion.dst_size;
79 const u32 selector = static_cast<u32>(instr.conversion.int_src.selector);
80
81 Node value = [this, instr, opcode] {
82 switch (opcode->get().GetId()) {
83 case OpCode::Id::I2I_R:
84 return GetRegister(instr.gpr20);
85 case OpCode::Id::I2I_C:
86 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
87 case OpCode::Id::I2I_IMM:
88 return Immediate(instr.alu.GetSignedImm20_20());
89 default:
90 UNREACHABLE();
91 return Immediate(0);
92 }
93 }();
94
95 // Ensure the source selector is valid
96 switch (instr.conversion.src_size) {
97 case Register::Size::Byte:
98 break;
99 case Register::Size::Short:
100 ASSERT(selector == 0 || selector == 2);
101 break;
102 default:
103 ASSERT(selector == 0);
104 break;
105 }
106
107 if (src_size != Register::Size::Word || selector != 0) {
108 value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value),
109 Immediate(selector * 8), Immediate(SizeInBits(src_size)));
110 }
111
112 value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a,
113 instr.conversion.negate_a, src_signed);
114
115 if (instr.alu.saturate_d) {
116 if (src_signed && !dst_signed) {
117 Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value,
118 Immediate(1 << (SizeInBits(src_size) - 1)));
119 value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0),
120 std::move(value));
121
122 // Simplify generated expressions, this can be removed without semantic impact
123 SetTemporary(bb, 0, std::move(value));
124 value = GetTemporary(0);
125
126 if (dst_size != Register::Size::Word) {
127 const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1);
128 Node is_large =
129 Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit);
130 value = Operation(OperationCode::Select, std::move(is_large), limit,
131 std::move(value));
132 }
133 } else if (const std::optional bounds =
134 IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) {
135 value = SignedOperation(OperationCode::IMax, src_signed, std::move(value),
136 Immediate(bounds->first));
137 value = SignedOperation(OperationCode::IMin, src_signed, std::move(value),
138 Immediate(bounds->second));
139 }
140 } else if (dst_size != Register::Size::Word) {
141 // No saturation, we only have to mask the result
142 Node mask = Immediate((1 << SizeInBits(dst_size)) - 1);
143 value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask));
144 }
145
146 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
147 SetRegister(bb, instr.gpr0, std::move(value));
148 break;
149 }
150 case OpCode::Id::I2F_R:
151 case OpCode::Id::I2F_C:
152 case OpCode::Id::I2F_IMM: {
153 UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
154 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
155 "Condition codes generation in I2F is not implemented");
156
157 Node value = [&] {
158 switch (opcode->get().GetId()) {
159 case OpCode::Id::I2F_R:
160 return GetRegister(instr.gpr20);
161 case OpCode::Id::I2F_C:
162 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
163 case OpCode::Id::I2F_IMM:
164 return Immediate(instr.alu.GetSignedImm20_20());
165 default:
166 UNREACHABLE();
167 return Immediate(0);
168 }
169 }();
170
171 const bool input_signed = instr.conversion.is_input_signed;
172
173 if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) {
174 ASSERT(instr.conversion.src_size == Register::Size::Byte ||
175 instr.conversion.src_size == Register::Size::Short);
176 if (instr.conversion.src_size == Register::Size::Short) {
177 ASSERT(offset == 0 || offset == 2);
178 }
179 value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
180 std::move(value), Immediate(offset * 8));
181 }
182
183 value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
184 value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
185 value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
186 value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
187
188 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
189
190 if (instr.conversion.dst_size == Register::Size::Short) {
191 value = Operation(OperationCode::HCastFloat, PRECISE, value);
192 }
193
194 SetRegister(bb, instr.gpr0, value);
195 break;
196 }
197 case OpCode::Id::F2F_R:
198 case OpCode::Id::F2F_C:
199 case OpCode::Id::F2F_IMM: {
200 UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
201 UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
202 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
203 "Condition codes generation in F2F is not implemented");
204
205 Node value = [&]() {
206 switch (opcode->get().GetId()) {
207 case OpCode::Id::F2F_R:
208 return GetRegister(instr.gpr20);
209 case OpCode::Id::F2F_C:
210 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
211 case OpCode::Id::F2F_IMM:
212 return GetImmediate19(instr);
213 default:
214 UNREACHABLE();
215 return Immediate(0);
216 }
217 }();
218
219 if (instr.conversion.src_size == Register::Size::Short) {
220 value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
221 std::move(value));
222 } else {
223 ASSERT(instr.conversion.float_src.selector == 0);
224 }
225
226 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
227
228 value = [&] {
229 if (instr.conversion.src_size != instr.conversion.dst_size) {
230 // Rounding operations only matter when the source and destination conversion size
231 // is the same.
232 return value;
233 }
234 switch (instr.conversion.f2f.GetRoundingMode()) {
235 case Tegra::Shader::F2fRoundingOp::None:
236 return value;
237 case Tegra::Shader::F2fRoundingOp::Round:
238 return Operation(OperationCode::FRoundEven, value);
239 case Tegra::Shader::F2fRoundingOp::Floor:
240 return Operation(OperationCode::FFloor, value);
241 case Tegra::Shader::F2fRoundingOp::Ceil:
242 return Operation(OperationCode::FCeil, value);
243 case Tegra::Shader::F2fRoundingOp::Trunc:
244 return Operation(OperationCode::FTrunc, value);
245 default:
246 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
247 instr.conversion.f2f.rounding.Value());
248 return value;
249 }
250 }();
251 value = GetSaturatedFloat(value, instr.alu.saturate_d);
252
253 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
254
255 if (instr.conversion.dst_size == Register::Size::Short) {
256 value = Operation(OperationCode::HCastFloat, PRECISE, value);
257 }
258
259 SetRegister(bb, instr.gpr0, value);
260 break;
261 }
262 case OpCode::Id::F2I_R:
263 case OpCode::Id::F2I_C:
264 case OpCode::Id::F2I_IMM: {
265 UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
266 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
267 "Condition codes generation in F2I is not implemented");
268 Node value = [&]() {
269 switch (opcode->get().GetId()) {
270 case OpCode::Id::F2I_R:
271 return GetRegister(instr.gpr20);
272 case OpCode::Id::F2I_C:
273 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
274 case OpCode::Id::F2I_IMM:
275 return GetImmediate19(instr);
276 default:
277 UNREACHABLE();
278 return Immediate(0);
279 }
280 }();
281
282 if (instr.conversion.src_size == Register::Size::Short) {
283 value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
284 std::move(value));
285 } else {
286 ASSERT(instr.conversion.float_src.selector == 0);
287 }
288
289 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
290
291 value = [&]() {
292 switch (instr.conversion.f2i.rounding) {
293 case Tegra::Shader::F2iRoundingOp::RoundEven:
294 return Operation(OperationCode::FRoundEven, PRECISE, value);
295 case Tegra::Shader::F2iRoundingOp::Floor:
296 return Operation(OperationCode::FFloor, PRECISE, value);
297 case Tegra::Shader::F2iRoundingOp::Ceil:
298 return Operation(OperationCode::FCeil, PRECISE, value);
299 case Tegra::Shader::F2iRoundingOp::Trunc:
300 return Operation(OperationCode::FTrunc, PRECISE, value);
301 default:
302 UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
303 instr.conversion.f2i.rounding.Value());
304 return Immediate(0);
305 }
306 }();
307 const bool is_signed = instr.conversion.is_output_signed;
308 value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value);
309 value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed);
310
311 SetRegister(bb, instr.gpr0, value);
312 break;
313 }
314 default:
315 UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
316 }
317
318 return pc;
319}
320
321} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
deleted file mode 100644
index 5973588d6..000000000
--- a/src/video_core/shader/decode/ffma.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
21 if (instr.ffma.tab5980_0 != 1) {
22 LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
23 }
24 if (instr.ffma.tab5980_1 != 0) {
25 LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
26 }
27
28 const Node op_a = GetRegister(instr.gpr8);
29
30 auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> {
31 switch (opcode->get().GetId()) {
32 case OpCode::Id::FFMA_CR: {
33 return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
34 GetRegister(instr.gpr39)};
35 }
36 case OpCode::Id::FFMA_RR:
37 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
38 case OpCode::Id::FFMA_RC: {
39 return {GetRegister(instr.gpr39),
40 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
41 }
42 case OpCode::Id::FFMA_IMM:
43 return {GetImmediate19(instr), GetRegister(instr.gpr39)};
44 default:
45 UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
46 return {Immediate(0), Immediate(0)};
47 }
48 }();
49
50 op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b);
51 op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c);
52
53 Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c);
54 value = GetSaturatedFloat(value, instr.alu.saturate_d);
55
56 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
57 SetRegister(bb, instr.gpr0, value);
58
59 return pc;
60}
61
62} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
deleted file mode 100644
index 5614e8a0d..000000000
--- a/src/video_core/shader/decode/float_set.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18
19 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
20 instr.fset.neg_a != 0);
21
22 Node op_b = [&]() {
23 if (instr.is_b_imm) {
24 return GetImmediate19(instr);
25 } else if (instr.is_b_gpr) {
26 return GetRegister(instr.gpr20);
27 } else {
28 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
29 }
30 }();
31
32 op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0);
33
34 // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
35 // condition is true, and to 0 otherwise.
36 const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0);
37
38 const OperationCode combiner = GetPredicateCombiner(instr.fset.op);
39 const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b);
40
41 const Node predicate = Operation(combiner, first_pred, second_pred);
42
43 const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1);
44 const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0);
45 const Node value =
46 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
47
48 if (instr.fset.bf) {
49 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
50 } else {
51 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
52 }
53 SetRegister(bb, instr.gpr0, value);
54
55 return pc;
56}
57
58} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
deleted file mode 100644
index 200c2c983..000000000
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16
17u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19
20 Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
21 instr.fsetp.neg_a != 0);
22 Node op_b = [&]() {
23 if (instr.is_b_imm) {
24 return GetImmediate19(instr);
25 } else if (instr.is_b_gpr) {
26 return GetRegister(instr.gpr20);
27 } else {
28 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
29 }
30 }();
31 op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b);
32
33 // We can't use the constant predicate as destination.
34 ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
35
36 const Node predicate =
37 GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b));
38 const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
39
40 const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
41 const Node value = Operation(combiner, predicate, second_pred);
42
43 // Set the primary predicate to the result of Predicate OP SecondPredicate
44 SetPredicate(bb, instr.fsetp.pred3, value);
45
46 if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
47 // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
48 // if enabled
49 const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
50 const Node second_value = Operation(combiner, negated_pred, second_pred);
51 SetPredicate(bb, instr.fsetp.pred0, second_value);
52 }
53
54 return pc;
55}
56
57} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
deleted file mode 100644
index fa83108cd..000000000
--- a/src/video_core/shader/decode/half_set.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "common/logging/log.h"
10#include "video_core/engines/shader_bytecode.h"
11#include "video_core/shader/node_helper.h"
12#include "video_core/shader/shader_ir.h"
13
14namespace VideoCommon::Shader {
15
16using std::move;
17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode;
19using Tegra::Shader::PredCondition;
20
21u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
22 const Instruction instr = {program_code[pc]};
23 const auto opcode = OpCode::Decode(instr);
24
25 PredCondition cond{};
26 bool bf = false;
27 bool ftz = false;
28 bool neg_a = false;
29 bool abs_a = false;
30 bool neg_b = false;
31 bool abs_b = false;
32 switch (opcode->get().GetId()) {
33 case OpCode::Id::HSET2_C:
34 case OpCode::Id::HSET2_IMM:
35 cond = instr.hsetp2.cbuf_and_imm.cond;
36 bf = instr.Bit(53);
37 ftz = instr.Bit(54);
38 neg_a = instr.Bit(43);
39 abs_a = instr.Bit(44);
40 neg_b = instr.Bit(56);
41 abs_b = instr.Bit(54);
42 break;
43 case OpCode::Id::HSET2_R:
44 cond = instr.hsetp2.reg.cond;
45 bf = instr.Bit(49);
46 ftz = instr.Bit(50);
47 neg_a = instr.Bit(43);
48 abs_a = instr.Bit(44);
49 neg_b = instr.Bit(31);
50 abs_b = instr.Bit(30);
51 break;
52 default:
53 UNREACHABLE();
54 }
55
56 Node op_b = [this, instr, opcode] {
57 switch (opcode->get().GetId()) {
58 case OpCode::Id::HSET2_C:
59 // Inform as unimplemented as this is not tested.
60 UNIMPLEMENTED_MSG("HSET2_C is not implemented");
61 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
62 case OpCode::Id::HSET2_R:
63 return GetRegister(instr.gpr20);
64 case OpCode::Id::HSET2_IMM:
65 return UnpackHalfImmediate(instr, true);
66 default:
67 UNREACHABLE();
68 return Node{};
69 }
70 }();
71
72 if (!ftz) {
73 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
74 }
75
76 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
77 op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
78
79 switch (opcode->get().GetId()) {
80 case OpCode::Id::HSET2_R:
81 op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
82 [[fallthrough]];
83 case OpCode::Id::HSET2_C:
84 op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
85 break;
86 default:
87 break;
88 }
89
90 Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
91
92 Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
93
94 const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
95
96 // HSET2 operates on each half float in the pack.
97 std::array<Node, 2> values;
98 for (u32 i = 0; i < 2; ++i) {
99 const u32 raw_value = bf ? 0x3c00 : 0xffff;
100 Node true_value = Immediate(raw_value << (i * 16));
101 Node false_value = Immediate(0);
102
103 Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
104 Node predicate = Operation(combiner, comparison, second_pred);
105 values[i] =
106 Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
107 }
108
109 Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
110 SetRegister(bb, instr.gpr0, move(value));
111
112 return pc;
113}
114
115} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
deleted file mode 100644
index 310655619..000000000
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::Pred;
17
18u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
19 const Instruction instr = {program_code[pc]};
20 const auto opcode = OpCode::Decode(instr);
21
22 if (instr.hsetp2.ftz != 0) {
23 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
24 }
25
26 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
27 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
28
29 Tegra::Shader::PredCondition cond{};
30 bool h_and{};
31 Node op_b{};
32 switch (opcode->get().GetId()) {
33 case OpCode::Id::HSETP2_C:
34 cond = instr.hsetp2.cbuf_and_imm.cond;
35 h_and = instr.hsetp2.cbuf_and_imm.h_and;
36 op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
37 instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
38 // F32 is hardcoded in hardware
39 op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32);
40 break;
41 case OpCode::Id::HSETP2_IMM:
42 cond = instr.hsetp2.cbuf_and_imm.cond;
43 h_and = instr.hsetp2.cbuf_and_imm.h_and;
44 op_b = UnpackHalfImmediate(instr, true);
45 break;
46 case OpCode::Id::HSETP2_R:
47 cond = instr.hsetp2.reg.cond;
48 h_and = instr.hsetp2.reg.h_and;
49 op_b =
50 GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b),
51 instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b);
52 break;
53 default:
54 UNREACHABLE();
55 op_b = Immediate(0);
56 }
57
58 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
59 const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
60
61 const auto Write = [&](u64 dest, Node src) {
62 SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred));
63 };
64
65 const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
66 const u64 first = instr.hsetp2.pred3;
67 const u64 second = instr.hsetp2.pred0;
68 if (h_and) {
69 Node joined = Operation(OperationCode::LogicalAnd2, comparison);
70 Write(first, joined);
71 Write(second, Operation(OperationCode::LogicalNegate, std::move(joined)));
72 } else {
73 Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U)));
74 Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U)));
75 }
76
77 return pc;
78}
79
80} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
deleted file mode 100644
index 5b44cb79c..000000000
--- a/src/video_core/shader/decode/hfma2.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <tuple>
6
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "video_core/engines/shader_bytecode.h"
10#include "video_core/shader/node_helper.h"
11#include "video_core/shader/shader_ir.h"
12
13namespace VideoCommon::Shader {
14
15using Tegra::Shader::HalfPrecision;
16using Tegra::Shader::HalfType;
17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode;
19
20u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
21 const Instruction instr = {program_code[pc]};
22 const auto opcode = OpCode::Decode(instr);
23
24 if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
25 DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None);
26 } else {
27 DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None);
28 }
29
30 constexpr auto identity = HalfType::H0_H1;
31 bool neg_b{}, neg_c{};
32 auto [saturate, type_b, op_b, type_c,
33 op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
34 switch (opcode->get().GetId()) {
35 case OpCode::Id::HFMA2_CR:
36 neg_b = instr.hfma2.negate_b;
37 neg_c = instr.hfma2.negate_c;
38 return {instr.hfma2.saturate, HalfType::F32,
39 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
40 instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
41 case OpCode::Id::HFMA2_RC:
42 neg_b = instr.hfma2.negate_b;
43 neg_c = instr.hfma2.negate_c;
44 return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
45 HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
46 case OpCode::Id::HFMA2_RR:
47 neg_b = instr.hfma2.rr.negate_b;
48 neg_c = instr.hfma2.rr.negate_c;
49 return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
50 instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
51 case OpCode::Id::HFMA2_IMM_R:
52 neg_c = instr.hfma2.negate_c;
53 return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
54 instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
55 default:
56 return {false, identity, Immediate(0), identity, Immediate(0)};
57 }
58 }();
59
60 const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
61 op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
62 op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
63
64 Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
65 value = GetSaturatedHalfFloat(value, saturate);
66 value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
67
68 SetRegister(bb, instr.gpr0, value);
69
70 return pc;
71}
72
73} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
deleted file mode 100644
index 5470e8cf4..000000000
--- a/src/video_core/shader/decode/image.cpp
+++ /dev/null
@@ -1,536 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <vector>
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/bit_field.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/shader/node_helper.h"
15#include "video_core/shader/shader_ir.h"
16#include "video_core/textures/texture.h"
17
18namespace VideoCommon::Shader {
19
20using Tegra::Shader::Instruction;
21using Tegra::Shader::OpCode;
22using Tegra::Shader::PredCondition;
23using Tegra::Shader::StoreType;
24using Tegra::Texture::ComponentType;
25using Tegra::Texture::TextureFormat;
26using Tegra::Texture::TICEntry;
27
28namespace {
29
30ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
31 std::size_t component) {
32 const TextureFormat format{descriptor.format};
33 switch (format) {
34 case TextureFormat::R16G16B16A16:
35 case TextureFormat::R32G32B32A32:
36 case TextureFormat::R32G32B32:
37 case TextureFormat::R32G32:
38 case TextureFormat::R16G16:
39 case TextureFormat::R32:
40 case TextureFormat::R16:
41 case TextureFormat::R8:
42 case TextureFormat::R1:
43 if (component == 0) {
44 return descriptor.r_type;
45 }
46 if (component == 1) {
47 return descriptor.g_type;
48 }
49 if (component == 2) {
50 return descriptor.b_type;
51 }
52 if (component == 3) {
53 return descriptor.a_type;
54 }
55 break;
56 case TextureFormat::A8R8G8B8:
57 if (component == 0) {
58 return descriptor.a_type;
59 }
60 if (component == 1) {
61 return descriptor.r_type;
62 }
63 if (component == 2) {
64 return descriptor.g_type;
65 }
66 if (component == 3) {
67 return descriptor.b_type;
68 }
69 break;
70 case TextureFormat::A2B10G10R10:
71 case TextureFormat::A4B4G4R4:
72 case TextureFormat::A5B5G5R1:
73 case TextureFormat::A1B5G5R5:
74 if (component == 0) {
75 return descriptor.a_type;
76 }
77 if (component == 1) {
78 return descriptor.b_type;
79 }
80 if (component == 2) {
81 return descriptor.g_type;
82 }
83 if (component == 3) {
84 return descriptor.r_type;
85 }
86 break;
87 case TextureFormat::R32_B24G8:
88 if (component == 0) {
89 return descriptor.r_type;
90 }
91 if (component == 1) {
92 return descriptor.b_type;
93 }
94 if (component == 2) {
95 return descriptor.g_type;
96 }
97 break;
98 case TextureFormat::B5G6R5:
99 case TextureFormat::B6G5R5:
100 case TextureFormat::B10G11R11:
101 if (component == 0) {
102 return descriptor.b_type;
103 }
104 if (component == 1) {
105 return descriptor.g_type;
106 }
107 if (component == 2) {
108 return descriptor.r_type;
109 }
110 break;
111 case TextureFormat::R24G8:
112 case TextureFormat::R8G24:
113 case TextureFormat::R8G8:
114 case TextureFormat::G4R4:
115 if (component == 0) {
116 return descriptor.g_type;
117 }
118 if (component == 1) {
119 return descriptor.r_type;
120 }
121 break;
122 default:
123 break;
124 }
125 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
126 return ComponentType::FLOAT;
127}
128
129bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
130 constexpr u8 R = 0b0001;
131 constexpr u8 G = 0b0010;
132 constexpr u8 B = 0b0100;
133 constexpr u8 A = 0b1000;
134 constexpr std::array<u8, 16> mask = {
135 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B),
136 (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
137 return std::bitset<4>{mask.at(component_mask)}.test(component);
138}
139
140u32 GetComponentSize(TextureFormat format, std::size_t component) {
141 switch (format) {
142 case TextureFormat::R32G32B32A32:
143 return 32;
144 case TextureFormat::R16G16B16A16:
145 return 16;
146 case TextureFormat::R32G32B32:
147 return component <= 2 ? 32 : 0;
148 case TextureFormat::R32G32:
149 return component <= 1 ? 32 : 0;
150 case TextureFormat::R16G16:
151 return component <= 1 ? 16 : 0;
152 case TextureFormat::R32:
153 return component == 0 ? 32 : 0;
154 case TextureFormat::R16:
155 return component == 0 ? 16 : 0;
156 case TextureFormat::R8:
157 return component == 0 ? 8 : 0;
158 case TextureFormat::R1:
159 return component == 0 ? 1 : 0;
160 case TextureFormat::A8R8G8B8:
161 return 8;
162 case TextureFormat::A2B10G10R10:
163 return (component == 3 || component == 2 || component == 1) ? 10 : 2;
164 case TextureFormat::A4B4G4R4:
165 return 4;
166 case TextureFormat::A5B5G5R1:
167 return (component == 0 || component == 1 || component == 2) ? 5 : 1;
168 case TextureFormat::A1B5G5R5:
169 return (component == 1 || component == 2 || component == 3) ? 5 : 1;
170 case TextureFormat::R32_B24G8:
171 if (component == 0) {
172 return 32;
173 }
174 if (component == 1) {
175 return 24;
176 }
177 if (component == 2) {
178 return 8;
179 }
180 return 0;
181 case TextureFormat::B5G6R5:
182 if (component == 0 || component == 2) {
183 return 5;
184 }
185 if (component == 1) {
186 return 6;
187 }
188 return 0;
189 case TextureFormat::B6G5R5:
190 if (component == 1 || component == 2) {
191 return 5;
192 }
193 if (component == 0) {
194 return 6;
195 }
196 return 0;
197 case TextureFormat::B10G11R11:
198 if (component == 1 || component == 2) {
199 return 11;
200 }
201 if (component == 0) {
202 return 10;
203 }
204 return 0;
205 case TextureFormat::R24G8:
206 if (component == 0) {
207 return 8;
208 }
209 if (component == 1) {
210 return 24;
211 }
212 return 0;
213 case TextureFormat::R8G24:
214 if (component == 0) {
215 return 24;
216 }
217 if (component == 1) {
218 return 8;
219 }
220 return 0;
221 case TextureFormat::R8G8:
222 return (component == 0 || component == 1) ? 8 : 0;
223 case TextureFormat::G4R4:
224 return (component == 0 || component == 1) ? 4 : 0;
225 default:
226 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
227 return 0;
228 }
229}
230
231std::size_t GetImageComponentMask(TextureFormat format) {
232 constexpr u8 R = 0b0001;
233 constexpr u8 G = 0b0010;
234 constexpr u8 B = 0b0100;
235 constexpr u8 A = 0b1000;
236 switch (format) {
237 case TextureFormat::R32G32B32A32:
238 case TextureFormat::R16G16B16A16:
239 case TextureFormat::A8R8G8B8:
240 case TextureFormat::A2B10G10R10:
241 case TextureFormat::A4B4G4R4:
242 case TextureFormat::A5B5G5R1:
243 case TextureFormat::A1B5G5R5:
244 return std::size_t{R | G | B | A};
245 case TextureFormat::R32G32B32:
246 case TextureFormat::R32_B24G8:
247 case TextureFormat::B5G6R5:
248 case TextureFormat::B6G5R5:
249 case TextureFormat::B10G11R11:
250 return std::size_t{R | G | B};
251 case TextureFormat::R32G32:
252 case TextureFormat::R16G16:
253 case TextureFormat::R24G8:
254 case TextureFormat::R8G24:
255 case TextureFormat::R8G8:
256 case TextureFormat::G4R4:
257 return std::size_t{R | G};
258 case TextureFormat::R32:
259 case TextureFormat::R16:
260 case TextureFormat::R8:
261 case TextureFormat::R1:
262 return std::size_t{R};
263 default:
264 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
265 return std::size_t{R | G | B | A};
266 }
267}
268
269std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
270 switch (image_type) {
271 case Tegra::Shader::ImageType::Texture1D:
272 case Tegra::Shader::ImageType::TextureBuffer:
273 return 1;
274 case Tegra::Shader::ImageType::Texture1DArray:
275 case Tegra::Shader::ImageType::Texture2D:
276 return 2;
277 case Tegra::Shader::ImageType::Texture2DArray:
278 case Tegra::Shader::ImageType::Texture3D:
279 return 3;
280 }
281 UNREACHABLE();
282 return 1;
283}
284} // Anonymous namespace
285
286std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
287 Node original_value) {
288 switch (component_type) {
289 case ComponentType::SNORM: {
290 // range [-1.0, 1.0]
291 auto cnv_value = Operation(OperationCode::FMul, original_value,
292 Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
293 cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
294 return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
295 }
296 case ComponentType::SINT:
297 case ComponentType::UNORM: {
298 bool is_signed = component_type == ComponentType::SINT;
299 // range [0.0, 1.0]
300 auto cnv_value = Operation(OperationCode::FMul, original_value,
301 Immediate(static_cast<float>(1 << component_size) - 1.f));
302 return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
303 is_signed};
304 }
305 case ComponentType::UINT: // range [0, (1 << component_size) - 1]
306 return {std::move(original_value), false};
307 case ComponentType::FLOAT:
308 if (component_size == 16) {
309 return {Operation(OperationCode::HCastFloat, original_value), true};
310 } else {
311 return {std::move(original_value), true};
312 }
313 default:
314 UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type);
315 return {std::move(original_value), true};
316 }
317}
318
319u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
320 const Instruction instr = {program_code[pc]};
321 const auto opcode = OpCode::Decode(instr);
322
323 const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) {
324 std::vector<Node> coords;
325 const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)};
326 coords.reserve(num_coords);
327 for (std::size_t i = 0; i < num_coords; ++i) {
328 coords.push_back(GetRegister(instr.gpr8.Value() + i));
329 }
330 return coords;
331 };
332
333 switch (opcode->get().GetId()) {
334 case OpCode::Id::SULD: {
335 UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
336 Tegra::Shader::OutOfBoundsStore::Ignore);
337
338 const auto type{instr.suldst.image_type};
339 auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
340 : GetBindlessImage(instr.gpr39, type)};
341 image.MarkRead();
342
343 if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
344 u32 indexer = 0;
345 for (u32 element = 0; element < 4; ++element) {
346 if (!instr.suldst.IsComponentEnabled(element)) {
347 continue;
348 }
349 MetaImage meta{image, {}, element};
350 Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
351 SetTemporary(bb, indexer++, std::move(value));
352 }
353 for (u32 i = 0; i < indexer; ++i) {
354 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
355 }
356 } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
357 UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
358 instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
359
360 auto descriptor = [this, instr] {
361 std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor;
362 if (instr.suldst.is_immediate) {
363 sampler_descriptor =
364 registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
365 } else {
366 const Node image_register = GetRegister(instr.gpr39);
367 const auto result = TrackCbuf(image_register, global_code,
368 static_cast<s64>(global_code.size()));
369 const auto buffer = std::get<1>(result);
370 const auto offset = std::get<2>(result);
371 sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset);
372 }
373 if (!sampler_descriptor) {
374 UNREACHABLE_MSG("Failed to obtain image descriptor");
375 }
376 return *sampler_descriptor;
377 }();
378
379 const auto comp_mask = GetImageComponentMask(descriptor.format);
380
381 switch (instr.suldst.GetStoreDataLayout()) {
382 case StoreType::Bits32:
383 case StoreType::Bits64: {
384 u32 indexer = 0;
385 u32 shifted_counter = 0;
386 Node value = Immediate(0);
387 for (u32 element = 0; element < 4; ++element) {
388 if (!IsComponentEnabled(comp_mask, element)) {
389 continue;
390 }
391 const auto component_type = GetComponentType(descriptor, element);
392 const auto component_size = GetComponentSize(descriptor.format, element);
393 MetaImage meta{image, {}, element};
394
395 auto [converted_value, is_signed] = GetComponentValue(
396 component_type, component_size,
397 Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
398
399 // shift element to correct position
400 const auto shifted = shifted_counter;
401 if (shifted > 0) {
402 converted_value =
403 SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
404 std::move(converted_value), Immediate(shifted));
405 }
406 shifted_counter += component_size;
407
408 // add value into result
409 value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
410
411 // if we shifted enough for 1 byte -> we save it into temp
412 if (shifted_counter >= 32) {
413 SetTemporary(bb, indexer++, std::move(value));
414 // reset counter and value to prepare pack next byte
415 value = Immediate(0);
416 shifted_counter = 0;
417 }
418 }
419 for (u32 i = 0; i < indexer; ++i) {
420 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
421 }
422 break;
423 }
424 default:
425 UNREACHABLE();
426 break;
427 }
428 }
429 break;
430 }
431 case OpCode::Id::SUST: {
432 UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
433 UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
434 Tegra::Shader::OutOfBoundsStore::Ignore);
435 UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA
436
437 std::vector<Node> values;
438 constexpr std::size_t hardcoded_size{4};
439 for (std::size_t i = 0; i < hardcoded_size; ++i) {
440 values.push_back(GetRegister(instr.gpr0.Value() + i));
441 }
442
443 const auto type{instr.suldst.image_type};
444 auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
445 : GetBindlessImage(instr.gpr39, type)};
446 image.MarkWrite();
447
448 MetaImage meta{image, std::move(values)};
449 bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type)));
450 break;
451 }
452 case OpCode::Id::SUATOM: {
453 UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0);
454
455 const OperationCode operation_code = [instr] {
456 switch (instr.suatom_d.operation_type) {
457 case Tegra::Shader::ImageAtomicOperationType::S32:
458 case Tegra::Shader::ImageAtomicOperationType::U32:
459 switch (instr.suatom_d.operation) {
460 case Tegra::Shader::ImageAtomicOperation::Add:
461 return OperationCode::AtomicImageAdd;
462 case Tegra::Shader::ImageAtomicOperation::And:
463 return OperationCode::AtomicImageAnd;
464 case Tegra::Shader::ImageAtomicOperation::Or:
465 return OperationCode::AtomicImageOr;
466 case Tegra::Shader::ImageAtomicOperation::Xor:
467 return OperationCode::AtomicImageXor;
468 case Tegra::Shader::ImageAtomicOperation::Exch:
469 return OperationCode::AtomicImageExchange;
470 default:
471 break;
472 }
473 break;
474 default:
475 break;
476 }
477 UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}",
478 static_cast<u64>(instr.suatom_d.operation.Value()),
479 static_cast<u64>(instr.suatom_d.operation_type.Value()));
480 return OperationCode::AtomicImageAdd;
481 }();
482
483 Node value = GetRegister(instr.gpr0);
484
485 const auto type = instr.suatom_d.image_type;
486 auto& image = GetImage(instr.image, type);
487 image.MarkAtomic();
488
489 MetaImage meta{image, {std::move(value)}};
490 SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type)));
491 break;
492 }
493 default:
494 UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName());
495 }
496
497 return pc;
498}
499
500ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
501 const auto offset = static_cast<u32>(image.index.Value());
502
503 const auto it =
504 std::find_if(std::begin(used_images), std::end(used_images),
505 [offset](const ImageEntry& entry) { return entry.offset == offset; });
506 if (it != std::end(used_images)) {
507 ASSERT(!it->is_bindless && it->type == type);
508 return *it;
509 }
510
511 const auto next_index = static_cast<u32>(used_images.size());
512 return used_images.emplace_back(next_index, offset, type);
513}
514
515ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
516 const Node image_register = GetRegister(reg);
517 const auto result =
518 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
519
520 const auto buffer = std::get<1>(result);
521 const auto offset = std::get<2>(result);
522
523 const auto it = std::find_if(std::begin(used_images), std::end(used_images),
524 [buffer, offset](const ImageEntry& entry) {
525 return entry.buffer == buffer && entry.offset == offset;
526 });
527 if (it != std::end(used_images)) {
528 ASSERT(it->is_bindless && it->type == type);
529 return *it;
530 }
531
532 const auto next_index = static_cast<u32>(used_images.size());
533 return used_images.emplace_back(next_index, offset, buffer, type);
534}
535
536} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
deleted file mode 100644
index 59809bcd8..000000000
--- a/src/video_core/shader/decode/integer_set.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "video_core/engines/shader_bytecode.h"
7#include "video_core/shader/node_helper.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14
15u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]};
17
18 const Node op_a = GetRegister(instr.gpr8);
19 const Node op_b = [&]() {
20 if (instr.is_b_imm) {
21 return Immediate(instr.alu.GetSignedImm20_20());
22 } else if (instr.is_b_gpr) {
23 return GetRegister(instr.gpr20);
24 } else {
25 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
26 }
27 }();
28
29 // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition
30 // is true, and to 0 otherwise.
31 const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0);
32 const Node first_pred =
33 GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b);
34
35 const OperationCode combiner = GetPredicateCombiner(instr.iset.op);
36
37 const Node predicate = Operation(combiner, first_pred, second_pred);
38
39 const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1);
40 const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0);
41 const Node value =
42 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
43
44 SetRegister(bb, instr.gpr0, value);
45
46 return pc;
47}
48
49} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
deleted file mode 100644
index 25e48fef8..000000000
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16
17u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19
20 const Node op_a = GetRegister(instr.gpr8);
21
22 const Node op_b = [&]() {
23 if (instr.is_b_imm) {
24 return Immediate(instr.alu.GetSignedImm20_20());
25 } else if (instr.is_b_gpr) {
26 return GetRegister(instr.gpr20);
27 } else {
28 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
29 }
30 }();
31
32 // We can't use the constant predicate as destination.
33 ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
34
35 const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0);
36 const Node predicate =
37 GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b);
38
39 // Set the primary predicate to the result of Predicate OP SecondPredicate
40 const OperationCode combiner = GetPredicateCombiner(instr.isetp.op);
41 const Node value = Operation(combiner, predicate, second_pred);
42 SetPredicate(bb, instr.isetp.pred3, value);
43
44 if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
45 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
46 const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
47 SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred));
48 }
49
50 return pc;
51}
52
53} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
deleted file mode 100644
index 7728f600e..000000000
--- a/src/video_core/shader/decode/memory.cpp
+++ /dev/null
@@ -1,493 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <utility>
7#include <vector>
8
9#include <fmt/format.h>
10
11#include "common/alignment.h"
12#include "common/assert.h"
13#include "common/common_types.h"
14#include "common/logging/log.h"
15#include "video_core/engines/shader_bytecode.h"
16#include "video_core/shader/node_helper.h"
17#include "video_core/shader/shader_ir.h"
18
19namespace VideoCommon::Shader {
20
21using std::move;
22using Tegra::Shader::AtomicOp;
23using Tegra::Shader::AtomicType;
24using Tegra::Shader::Attribute;
25using Tegra::Shader::GlobalAtomicType;
26using Tegra::Shader::Instruction;
27using Tegra::Shader::OpCode;
28using Tegra::Shader::Register;
29using Tegra::Shader::StoreType;
30
31namespace {
32
33OperationCode GetAtomOperation(AtomicOp op) {
34 switch (op) {
35 case AtomicOp::Add:
36 return OperationCode::AtomicIAdd;
37 case AtomicOp::Min:
38 return OperationCode::AtomicIMin;
39 case AtomicOp::Max:
40 return OperationCode::AtomicIMax;
41 case AtomicOp::And:
42 return OperationCode::AtomicIAnd;
43 case AtomicOp::Or:
44 return OperationCode::AtomicIOr;
45 case AtomicOp::Xor:
46 return OperationCode::AtomicIXor;
47 case AtomicOp::Exch:
48 return OperationCode::AtomicIExchange;
49 default:
50 UNIMPLEMENTED_MSG("op={}", op);
51 return OperationCode::AtomicIAdd;
52 }
53}
54
55bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
56 return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
57 uniform_type == Tegra::Shader::UniformType::UnsignedShort;
58}
59
60u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
61 switch (uniform_type) {
62 case Tegra::Shader::UniformType::UnsignedByte:
63 return 0b11;
64 case Tegra::Shader::UniformType::UnsignedShort:
65 return 0b10;
66 default:
67 UNREACHABLE();
68 return 0;
69 }
70}
71
72u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
73 switch (uniform_type) {
74 case Tegra::Shader::UniformType::UnsignedByte:
75 return 8;
76 case Tegra::Shader::UniformType::UnsignedShort:
77 return 16;
78 case Tegra::Shader::UniformType::Single:
79 return 32;
80 case Tegra::Shader::UniformType::Double:
81 return 64;
82 case Tegra::Shader::UniformType::Quad:
83 case Tegra::Shader::UniformType::UnsignedQuad:
84 return 128;
85 default:
86 UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type);
87 return 32;
88 }
89}
90
91Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
92 Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
93 offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
94 return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size));
95}
96
97Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
98 Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask));
99 offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
100 return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset),
101 Immediate(size));
102}
103
104Node Sign16Extend(Node value) {
105 Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
106 Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15));
107 Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
108 return Operation(OperationCode::UBitwiseOr, move(value), move(extend));
109}
110
111} // Anonymous namespace
112
113u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
114 const Instruction instr = {program_code[pc]};
115 const auto opcode = OpCode::Decode(instr);
116
117 switch (opcode->get().GetId()) {
118 case OpCode::Id::LD_A: {
119 // Note: Shouldn't this be interp mode flat? As in no interpolation made.
120 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
121 "Indirect attribute loads are not supported");
122 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
123 "Unaligned attribute loads are not supported");
124 UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() &&
125 instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word,
126 "Non-32 bits PHYS reads are not implemented");
127
128 const Node buffer{GetRegister(instr.gpr39)};
129
130 u64 next_element = instr.attribute.fmt20.element;
131 auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
132
133 const auto LoadNextElement = [&](u32 reg_offset) {
134 const Node attribute{instr.attribute.fmt20.IsPhysical()
135 ? GetPhysicalInputAttribute(instr.gpr8, buffer)
136 : GetInputAttribute(static_cast<Attribute::Index>(next_index),
137 next_element, buffer)};
138
139 SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
140
141 // Load the next attribute element into the following register. If the element
142 // to load goes beyond the vec4 size, load the first element of the next
143 // attribute.
144 next_element = (next_element + 1) % 4;
145 next_index = next_index + (next_element == 0 ? 1 : 0);
146 };
147
148 const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
149 for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
150 LoadNextElement(reg_offset);
151 }
152 break;
153 }
154 case OpCode::Id::LD_C: {
155 UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
156
157 Node index = GetRegister(instr.gpr8);
158
159 const Node op_a =
160 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
161
162 switch (instr.ld_c.type.Value()) {
163 case Tegra::Shader::UniformType::Single:
164 SetRegister(bb, instr.gpr0, op_a);
165 break;
166
167 case Tegra::Shader::UniformType::Double: {
168 const Node op_b =
169 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
170
171 SetTemporary(bb, 0, op_a);
172 SetTemporary(bb, 1, op_b);
173 SetRegister(bb, instr.gpr0, GetTemporary(0));
174 SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
175 break;
176 }
177 default:
178 UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value());
179 }
180 break;
181 }
182 case OpCode::Id::LD_L:
183 LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown);
184 [[fallthrough]];
185 case OpCode::Id::LD_S: {
186 const auto GetAddress = [&](s32 offset) {
187 ASSERT(offset % 4 == 0);
188 const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
189 return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset);
190 };
191 const auto GetMemory = [&](s32 offset) {
192 return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset))
193 : GetLocalMemory(GetAddress(offset));
194 };
195
196 switch (instr.ldst_sl.type.Value()) {
197 case StoreType::Signed16:
198 SetRegister(bb, instr.gpr0,
199 Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16)));
200 break;
201 case StoreType::Bits32:
202 case StoreType::Bits64:
203 case StoreType::Bits128: {
204 const u32 count = [&] {
205 switch (instr.ldst_sl.type.Value()) {
206 case StoreType::Bits32:
207 return 1;
208 case StoreType::Bits64:
209 return 2;
210 case StoreType::Bits128:
211 return 4;
212 default:
213 UNREACHABLE();
214 return 0;
215 }
216 }();
217 for (u32 i = 0; i < count; ++i) {
218 SetTemporary(bb, i, GetMemory(i * 4));
219 }
220 for (u32 i = 0; i < count; ++i) {
221 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
222 }
223 break;
224 }
225 default:
226 UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(),
227 instr.ldst_sl.type.Value());
228 }
229 break;
230 }
231 case OpCode::Id::LD:
232 case OpCode::Id::LDG: {
233 const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
234 switch (opcode->get().GetId()) {
235 case OpCode::Id::LD:
236 UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented");
237 return instr.generic.type;
238 case OpCode::Id::LDG:
239 return instr.ldg.type;
240 default:
241 UNREACHABLE();
242 return {};
243 }
244 }();
245
246 const auto [real_address_base, base_address, descriptor] =
247 TrackGlobalMemory(bb, instr, true, false);
248
249 const u32 size = GetMemorySize(type);
250 const u32 count = Common::AlignUp(size, 32) / 32;
251 if (!real_address_base || !base_address) {
252 // Tracking failed, load zeroes.
253 for (u32 i = 0; i < count; ++i) {
254 SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f));
255 }
256 break;
257 }
258
259 for (u32 i = 0; i < count; ++i) {
260 const Node it_offset = Immediate(i * 4);
261 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
262 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
263
264 // To handle unaligned loads get the bytes used to dereference global memory and extract
265 // those bytes from the loaded u32.
266 if (IsUnaligned(type)) {
267 gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);
268 }
269
270 SetTemporary(bb, i, gmem);
271 }
272
273 for (u32 i = 0; i < count; ++i) {
274 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
275 }
276 break;
277 }
278 case OpCode::Id::ST_A: {
279 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
280 "Indirect attribute loads are not supported");
281 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
282 "Unaligned attribute loads are not supported");
283
284 u64 element = instr.attribute.fmt20.element;
285 auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());
286
287 const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
288 for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
289 Node dest;
290 if (instr.attribute.fmt20.patch) {
291 const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
292 dest = MakeNode<PatchNode>(offset);
293 } else {
294 dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
295 GetRegister(instr.gpr39));
296 }
297 const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
298
299 bb.push_back(Operation(OperationCode::Assign, dest, src));
300
301 // Load the next attribute element into the following register. If the element to load
302 // goes beyond the vec4 size, load the first element of the next attribute.
303 element = (element + 1) % 4;
304 index = index + (element == 0 ? 1 : 0);
305 }
306 break;
307 }
308 case OpCode::Id::ST_L:
309 LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value());
310 [[fallthrough]];
311 case OpCode::Id::ST_S: {
312 const auto GetAddress = [&](s32 offset) {
313 ASSERT(offset % 4 == 0);
314 const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
315 return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
316 };
317
318 const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L;
319 const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory;
320 const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;
321
322 switch (instr.ldst_sl.type.Value()) {
323 case StoreType::Bits128:
324 (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
325 (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
326 [[fallthrough]];
327 case StoreType::Bits64:
328 (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
329 [[fallthrough]];
330 case StoreType::Bits32:
331 (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
332 break;
333 case StoreType::Unsigned16:
334 case StoreType::Signed16: {
335 Node address = GetAddress(0);
336 Node memory = (this->*get_memory)(address);
337 (this->*set_memory)(
338 bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16));
339 break;
340 }
341 default:
342 UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
343 instr.ldst_sl.type.Value());
344 }
345 break;
346 }
347 case OpCode::Id::ST:
348 case OpCode::Id::STG: {
349 const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
350 switch (opcode->get().GetId()) {
351 case OpCode::Id::ST:
352 UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented");
353 return instr.generic.type;
354 case OpCode::Id::STG:
355 return instr.stg.type;
356 default:
357 UNREACHABLE();
358 return {};
359 }
360 }();
361
362 // For unaligned reads we have to read memory too.
363 const bool is_read = IsUnaligned(type);
364 const auto [real_address_base, base_address, descriptor] =
365 TrackGlobalMemory(bb, instr, is_read, true);
366 if (!real_address_base || !base_address) {
367 // Tracking failed, skip the store.
368 break;
369 }
370
371 const u32 size = GetMemorySize(type);
372 const u32 count = Common::AlignUp(size, 32) / 32;
373 for (u32 i = 0; i < count; ++i) {
374 const Node it_offset = Immediate(i * 4);
375 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
376 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
377 Node value = GetRegister(instr.gpr0.Value() + i);
378
379 if (IsUnaligned(type)) {
380 const u32 mask = GetUnalignedMask(type);
381 value = InsertUnaligned(gmem, move(value), real_address, mask, size);
382 }
383
384 bb.push_back(Operation(OperationCode::Assign, gmem, value));
385 }
386 break;
387 }
388 case OpCode::Id::RED: {
389 UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
390 instr.red.type.Value());
391 const auto [real_address, base_address, descriptor] =
392 TrackGlobalMemory(bb, instr, true, true);
393 if (!real_address || !base_address) {
394 // Tracking failed, skip atomic.
395 break;
396 }
397 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
398 Node value = GetRegister(instr.gpr0);
399 bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value)));
400 break;
401 }
402 case OpCode::Id::ATOM: {
403 UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
404 instr.atom.operation == AtomicOp::Dec ||
405 instr.atom.operation == AtomicOp::SafeAdd,
406 "operation={}", instr.atom.operation.Value());
407 UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
408 instr.atom.type == GlobalAtomicType::U64 ||
409 instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN ||
410 instr.atom.type == GlobalAtomicType::F32_FTZ_RN,
411 "type={}", instr.atom.type.Value());
412
413 const auto [real_address, base_address, descriptor] =
414 TrackGlobalMemory(bb, instr, true, true);
415 if (!real_address || !base_address) {
416 // Tracking failed, skip atomic.
417 break;
418 }
419
420 const bool is_signed =
421 instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64;
422 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
423 SetRegister(bb, instr.gpr0,
424 SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem,
425 GetRegister(instr.gpr20)));
426 break;
427 }
428 case OpCode::Id::ATOMS: {
429 UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc ||
430 instr.atoms.operation == AtomicOp::Dec,
431 "operation={}", instr.atoms.operation.Value());
432 UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 ||
433 instr.atoms.type == AtomicType::U64,
434 "type={}", instr.atoms.type.Value());
435 const bool is_signed =
436 instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
437 const s32 offset = instr.atoms.GetImmediateOffset();
438 Node address = GetRegister(instr.gpr8);
439 address = Operation(OperationCode::IAdd, move(address), Immediate(offset));
440 SetRegister(bb, instr.gpr0,
441 SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed,
442 GetSharedMemory(move(address)), GetRegister(instr.gpr20)));
443 break;
444 }
445 case OpCode::Id::AL2P: {
446 // Ignore al2p.direction since we don't care about it.
447
448 // Calculate emulation fake physical address.
449 const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))};
450 const Node reg{GetRegister(instr.gpr8)};
451 const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)};
452
453 // Set the fake address to target register.
454 SetRegister(bb, instr.gpr0, fake_address);
455
456 // Signal the shader IR to declare all possible attributes and varyings
457 uses_physical_attributes = true;
458 break;
459 }
460 default:
461 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
462 }
463
464 return pc;
465}
466
467std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
468 Instruction instr,
469 bool is_read, bool is_write) {
470 const auto addr_register{GetRegister(instr.gmem.gpr)};
471 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
472
473 const auto [base_address, index, offset] =
474 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
475 ASSERT_OR_EXECUTE_MSG(
476 base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
477 "Global memory tracking failed");
478
479 bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
480
481 const GlobalMemoryBase descriptor{index, offset};
482 const auto& entry = used_global_memory.try_emplace(descriptor).first;
483 auto& usage = entry->second;
484 usage.is_written |= is_write;
485 usage.is_read |= is_read;
486
487 const auto real_address =
488 Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
489
490 return {real_address, base_address, descriptor};
491}
492
493} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
deleted file mode 100644
index 5f88537bc..000000000
--- a/src/video_core/shader/decode/other.cpp
+++ /dev/null
@@ -1,322 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using std::move;
15using Tegra::Shader::ConditionCode;
16using Tegra::Shader::Instruction;
17using Tegra::Shader::IpaInterpMode;
18using Tegra::Shader::OpCode;
19using Tegra::Shader::PixelImap;
20using Tegra::Shader::Register;
21using Tegra::Shader::SystemVariable;
22
23using Index = Tegra::Shader::Attribute::Index;
24
25u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
26 const Instruction instr = {program_code[pc]};
27 const auto opcode = OpCode::Decode(instr);
28
29 switch (opcode->get().GetId()) {
30 case OpCode::Id::NOP: {
31 UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T);
32 UNIMPLEMENTED_IF(instr.nop.trigger != 0);
33 // With the previous preconditions, this instruction is a no-operation.
34 break;
35 }
36 case OpCode::Id::EXIT: {
37 const ConditionCode cc = instr.flow_condition_code;
38 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc);
39
40 switch (instr.flow.cond) {
41 case Tegra::Shader::FlowCondition::Always:
42 bb.push_back(Operation(OperationCode::Exit));
43 if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
44 // If this is an unconditional exit then just end processing here,
45 // otherwise we have to account for the possibility of the condition
46 // not being met, so continue processing the next instruction.
47 pc = MAX_PROGRAM_LENGTH - 1;
48 }
49 break;
50
51 case Tegra::Shader::FlowCondition::Fcsm_Tr:
52 // TODO(bunnei): What is this used for? If we assume this conditon is not
53 // satisifed, dual vertex shaders in Farming Simulator make more sense
54 UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
55 break;
56
57 default:
58 UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value());
59 }
60 break;
61 }
62 case OpCode::Id::KIL: {
63 UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
64
65 const ConditionCode cc = instr.flow_condition_code;
66 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc);
67
68 bb.push_back(Operation(OperationCode::Discard));
69 break;
70 }
71 case OpCode::Id::S2R: {
72 const Node value = [this, instr] {
73 switch (instr.sys20) {
74 case SystemVariable::LaneId:
75 return Operation(OperationCode::ThreadId);
76 case SystemVariable::InvocationId:
77 return Operation(OperationCode::InvocationId);
78 case SystemVariable::Ydirection:
79 uses_y_negate = true;
80 return Operation(OperationCode::YNegate);
81 case SystemVariable::InvocationInfo:
82 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
83 return Immediate(0x00ff'0000U);
84 case SystemVariable::WscaleFactorXY:
85 UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
86 return Immediate(0U);
87 case SystemVariable::WscaleFactorZ:
88 UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
89 return Immediate(0U);
90 case SystemVariable::Tid: {
91 Node val = Immediate(0);
92 val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9);
93 val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9);
94 val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5);
95 return val;
96 }
97 case SystemVariable::TidX:
98 return Operation(OperationCode::LocalInvocationIdX);
99 case SystemVariable::TidY:
100 return Operation(OperationCode::LocalInvocationIdY);
101 case SystemVariable::TidZ:
102 return Operation(OperationCode::LocalInvocationIdZ);
103 case SystemVariable::CtaIdX:
104 return Operation(OperationCode::WorkGroupIdX);
105 case SystemVariable::CtaIdY:
106 return Operation(OperationCode::WorkGroupIdY);
107 case SystemVariable::CtaIdZ:
108 return Operation(OperationCode::WorkGroupIdZ);
109 case SystemVariable::EqMask:
110 case SystemVariable::LtMask:
111 case SystemVariable::LeMask:
112 case SystemVariable::GtMask:
113 case SystemVariable::GeMask:
114 uses_warps = true;
115 switch (instr.sys20) {
116 case SystemVariable::EqMask:
117 return Operation(OperationCode::ThreadEqMask);
118 case SystemVariable::LtMask:
119 return Operation(OperationCode::ThreadLtMask);
120 case SystemVariable::LeMask:
121 return Operation(OperationCode::ThreadLeMask);
122 case SystemVariable::GtMask:
123 return Operation(OperationCode::ThreadGtMask);
124 case SystemVariable::GeMask:
125 return Operation(OperationCode::ThreadGeMask);
126 default:
127 UNREACHABLE();
128 return Immediate(0u);
129 }
130 default:
131 UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value());
132 return Immediate(0u);
133 }
134 }();
135 SetRegister(bb, instr.gpr0, value);
136
137 break;
138 }
139 case OpCode::Id::BRA: {
140 Node branch;
141 if (instr.bra.constant_buffer == 0) {
142 const u32 target = pc + instr.bra.GetBranchTarget();
143 branch = Operation(OperationCode::Branch, Immediate(target));
144 } else {
145 const u32 target = pc + 1;
146 const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
147 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
148 PRECISE, op_a, Immediate(3));
149 const Node operand =
150 Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
151 branch = Operation(OperationCode::BranchIndirect, operand);
152 }
153
154 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
155 if (cc != Tegra::Shader::ConditionCode::T) {
156 bb.push_back(Conditional(GetConditionCode(cc), {branch}));
157 } else {
158 bb.push_back(branch);
159 }
160 break;
161 }
162 case OpCode::Id::BRX: {
163 Node operand;
164 if (instr.brx.constant_buffer != 0) {
165 const s32 target = pc + 1;
166 const Node index = GetRegister(instr.gpr8);
167 const Node op_a =
168 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
169 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
170 PRECISE, op_a, Immediate(3));
171 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
172 } else {
173 const s32 target = pc + instr.brx.GetBranchExtend();
174 const Node op_a = GetRegister(instr.gpr8);
175 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
176 PRECISE, op_a, Immediate(3));
177 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
178 }
179 const Node branch = Operation(OperationCode::BranchIndirect, operand);
180
181 const ConditionCode cc = instr.flow_condition_code;
182 if (cc != ConditionCode::T) {
183 bb.push_back(Conditional(GetConditionCode(cc), {branch}));
184 } else {
185 bb.push_back(branch);
186 }
187 break;
188 }
189 case OpCode::Id::SSY: {
190 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
191 "Constant buffer flow is not supported");
192
193 if (disable_flow_stack) {
194 break;
195 }
196
197 // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
198 const u32 target = pc + instr.bra.GetBranchTarget();
199 bb.push_back(
200 Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target)));
201 break;
202 }
203 case OpCode::Id::PBK: {
204 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
205 "Constant buffer PBK is not supported");
206
207 if (disable_flow_stack) {
208 break;
209 }
210
211 // PBK pushes to a stack the address where BRK will jump to.
212 const u32 target = pc + instr.bra.GetBranchTarget();
213 bb.push_back(
214 Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target)));
215 break;
216 }
217 case OpCode::Id::SYNC: {
218 const ConditionCode cc = instr.flow_condition_code;
219 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc);
220
221 if (decompiled) {
222 break;
223 }
224
225 // The SYNC opcode jumps to the address previously set by the SSY opcode
226 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
227 break;
228 }
229 case OpCode::Id::BRK: {
230 const ConditionCode cc = instr.flow_condition_code;
231 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc);
232 if (decompiled) {
233 break;
234 }
235
236 // The BRK opcode jumps to the address previously set by the PBK opcode
237 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
238 break;
239 }
240 case OpCode::Id::IPA: {
241 const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
242 const auto attribute = instr.attribute.fmt28;
243 const Index index = attribute.index;
244
245 Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
246 : GetInputAttribute(index, attribute.element);
247
248 // Code taken from Ryujinx.
249 if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
250 const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
251 if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
252 Node position_w = GetInputAttribute(Index::Position, 3);
253 value = Operation(OperationCode::FMul, move(value), move(position_w));
254 }
255 }
256
257 if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
258 value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
259 }
260
261 value = GetSaturatedFloat(move(value), instr.ipa.saturate);
262
263 SetRegister(bb, instr.gpr0, move(value));
264 break;
265 }
266 case OpCode::Id::OUT_R: {
267 UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
268 "Stream buffer is not supported");
269
270 if (instr.out.emit) {
271 // gpr0 is used to store the next address and gpr8 contains the address to emit.
272 // Hardware uses pointers here but we just ignore it
273 bb.push_back(Operation(OperationCode::EmitVertex));
274 SetRegister(bb, instr.gpr0, Immediate(0));
275 }
276 if (instr.out.cut) {
277 bb.push_back(Operation(OperationCode::EndPrimitive));
278 }
279 break;
280 }
281 case OpCode::Id::ISBERD: {
282 UNIMPLEMENTED_IF(instr.isberd.o != 0);
283 UNIMPLEMENTED_IF(instr.isberd.skew != 0);
284 UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
285 UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
286 LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
287 SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
288 break;
289 }
290 case OpCode::Id::BAR: {
291 UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0");
292 bb.push_back(Operation(OperationCode::Barrier));
293 break;
294 }
295 case OpCode::Id::MEMBAR: {
296 UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
297 const OperationCode type = [instr] {
298 switch (instr.membar.type) {
299 case Tegra::Shader::MembarType::CTA:
300 return OperationCode::MemoryBarrierGroup;
301 case Tegra::Shader::MembarType::GL:
302 return OperationCode::MemoryBarrierGlobal;
303 default:
304 UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value());
305 return OperationCode::MemoryBarrierGlobal;
306 }
307 }();
308 bb.push_back(Operation(type));
309 break;
310 }
311 case OpCode::Id::DEPBAR: {
312 LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
313 break;
314 }
315 default:
316 UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
317 }
318
319 return pc;
320}
321
322} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp
deleted file mode 100644
index 9290d22eb..000000000
--- a/src/video_core/shader/decode/predicate_set_predicate.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16
17u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20
21 switch (opcode->get().GetId()) {
22 case OpCode::Id::PSETP: {
23 const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
24 const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
25
26 // We can't use the constant predicate as destination.
27 ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
28
29 const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
30
31 const OperationCode combiner = GetPredicateCombiner(instr.psetp.op);
32 const Node predicate = Operation(combiner, op_a, op_b);
33
34 // Set the primary predicate to the result of Predicate OP SecondPredicate
35 SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred));
36
37 if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
38 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
39 // enabled
40 SetPredicate(bb, instr.psetp.pred0,
41 Operation(combiner, Operation(OperationCode::LogicalNegate, predicate),
42 second_pred));
43 }
44 break;
45 }
46 case OpCode::Id::CSETP: {
47 const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
48 const Node condition_code = GetConditionCode(instr.csetp.cc);
49
50 const OperationCode combiner = GetPredicateCombiner(instr.csetp.op);
51
52 if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
53 SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred));
54 }
55 if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
56 const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code);
57 SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred));
58 }
59 break;
60 }
61 default:
62 UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
63 }
64
65 return pc;
66}
67
68} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
deleted file mode 100644
index 84dbc50fe..000000000
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18
19 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
20 "Condition codes generation in PSET is not implemented");
21
22 const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0);
23 const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0);
24 const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b);
25
26 const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0);
27
28 const OperationCode combiner = GetPredicateCombiner(instr.pset.op);
29 const Node predicate = Operation(combiner, first_pred, second_pred);
30
31 const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff);
32 const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0);
33 const Node value =
34 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
35
36 if (instr.pset.bf) {
37 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
38 } else {
39 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
40 }
41 SetRegister(bb, instr.gpr0, value);
42
43 return pc;
44}
45
46} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
deleted file mode 100644
index 6116c31aa..000000000
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "video_core/engines/shader_bytecode.h"
10#include "video_core/shader/node_helper.h"
11#include "video_core/shader/shader_ir.h"
12
13namespace VideoCommon::Shader {
14
15using std::move;
16using Tegra::Shader::Instruction;
17using Tegra::Shader::OpCode;
18
19namespace {
20constexpr u64 NUM_CONDITION_CODES = 4;
21constexpr u64 NUM_PREDICATES = 7;
22} // namespace
23
24u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
25 const Instruction instr = {program_code[pc]};
26 const auto opcode = OpCode::Decode(instr);
27
28 Node apply_mask = [this, opcode, instr] {
29 switch (opcode->get().GetId()) {
30 case OpCode::Id::R2P_IMM:
31 case OpCode::Id::P2R_IMM:
32 return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask));
33 default:
34 UNREACHABLE();
35 return Immediate(0);
36 }
37 }();
38
39 const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8;
40
41 const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc;
42 const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES;
43 const auto get_entry = [this, cc](u64 entry) {
44 return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry);
45 };
46
47 switch (opcode->get().GetId()) {
48 case OpCode::Id::R2P_IMM: {
49 Node mask = GetRegister(instr.gpr8);
50
51 for (u64 entry = 0; entry < num_entries; ++entry) {
52 const u32 shift = static_cast<u32>(entry);
53
54 Node apply = BitfieldExtract(apply_mask, shift, 1);
55 Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0));
56
57 Node compare = BitfieldExtract(mask, offset + shift, 1);
58 Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0));
59
60 Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value));
61 bb.push_back(Conditional(condition, {move(code)}));
62 }
63 break;
64 }
65 case OpCode::Id::P2R_IMM: {
66 Node value = Immediate(0);
67 for (u64 entry = 0; entry < num_entries; ++entry) {
68 Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry),
69 Immediate(0));
70 value = Operation(OperationCode::UBitwiseOr, move(value), move(bit));
71 }
72 value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask);
73 value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8);
74
75 SetRegister(bb, instr.gpr0, move(value));
76 break;
77 }
78 default:
79 UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName());
80 break;
81 }
82
83 return pc;
84}
85
86} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
deleted file mode 100644
index a53819c15..000000000
--- a/src/video_core/shader/decode/shift.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using std::move;
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::ShfType;
17using Tegra::Shader::ShfXmode;
18
19namespace {
20
21Node IsFull(Node shift) {
22 return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32));
23}
24
25Node Shift(OperationCode opcode, Node value, Node shift) {
26 Node shifted = Operation(opcode, move(value), shift);
27 return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted));
28}
29
30Node ClampShift(Node shift, s32 size = 32) {
31 shift = Operation(OperationCode::IMax, move(shift), Immediate(0));
32 return Operation(OperationCode::IMin, move(shift), Immediate(size));
33}
34
35Node WrapShift(Node shift, s32 size = 32) {
36 return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1));
37}
38
39Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) {
40 // These values are used when the shift value is less than 32
41 Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift);
42 Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift);
43 Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low));
44
45 if (type == ShfType::Bits32) {
46 // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
47 return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less));
48 }
49
50 // And these when it's larger than or 32
51 const bool is_signed = type == ShfType::S64;
52 const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed);
53 Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
54 Node greater = Shift(opcode, high, move(reduced));
55
56 Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
57 Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
58
59 Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
60 return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
61}
62
63Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) {
64 // These values are used when the shift value is less than 32
65 Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift);
66 Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift);
67 Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high));
68
69 if (type == ShfType::Bits32) {
70 // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
71 return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less));
72 }
73
74 // And these when it's larger than or 32
75 Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
76 Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced));
77
78 Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
79 Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
80
81 Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
82 return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
83}
84
85} // Anonymous namespace
86
87u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
88 const Instruction instr = {program_code[pc]};
89 const auto opcode = OpCode::Decode(instr);
90
91 Node op_a = GetRegister(instr.gpr8);
92 Node op_b = [this, instr] {
93 if (instr.is_b_imm) {
94 return Immediate(instr.alu.GetSignedImm20_20());
95 } else if (instr.is_b_gpr) {
96 return GetRegister(instr.gpr20);
97 } else {
98 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
99 }
100 }();
101
102 switch (const auto opid = opcode->get().GetId(); opid) {
103 case OpCode::Id::SHR_C:
104 case OpCode::Id::SHR_R:
105 case OpCode::Id::SHR_IMM: {
106 op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b));
107
108 Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed,
109 move(op_a), move(op_b));
110 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
111 SetRegister(bb, instr.gpr0, move(value));
112 break;
113 }
114 case OpCode::Id::SHL_C:
115 case OpCode::Id::SHL_R:
116 case OpCode::Id::SHL_IMM: {
117 Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
118 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
119 SetRegister(bb, instr.gpr0, move(value));
120 break;
121 }
122 case OpCode::Id::SHF_RIGHT_R:
123 case OpCode::Id::SHF_RIGHT_IMM:
124 case OpCode::Id::SHF_LEFT_R:
125 case OpCode::Id::SHF_LEFT_IMM: {
126 UNIMPLEMENTED_IF(instr.generates_cc);
127 UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}",
128 instr.shf.xmode.Value());
129
130 if (instr.is_b_imm) {
131 op_b = Immediate(static_cast<u32>(instr.shf.immediate));
132 }
133 const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64;
134 Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size);
135
136 Node negated_shift = Operation(OperationCode::INegate, shift);
137 Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32));
138
139 const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM;
140 Node value = (is_right ? ShiftRight : ShiftLeft)(
141 move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type);
142
143 SetRegister(bb, instr.gpr0, move(value));
144 break;
145 }
146 default:
147 UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
148 }
149
150 return pc;
151}
152
153} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
deleted file mode 100644
index c69681e8d..000000000
--- a/src/video_core/shader/decode/texture.cpp
+++ /dev/null
@@ -1,935 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <vector>
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/bit_field.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/shader/node_helper.h"
15#include "video_core/shader/registry.h"
16#include "video_core/shader/shader_ir.h"
17
18namespace VideoCommon::Shader {
19
20using Tegra::Shader::Instruction;
21using Tegra::Shader::OpCode;
22using Tegra::Shader::Register;
23using Tegra::Shader::TextureMiscMode;
24using Tegra::Shader::TextureProcessMode;
25using Tegra::Shader::TextureType;
26
27static std::size_t GetCoordCount(TextureType texture_type) {
28 switch (texture_type) {
29 case TextureType::Texture1D:
30 return 1;
31 case TextureType::Texture2D:
32 return 2;
33 case TextureType::Texture3D:
34 case TextureType::TextureCube:
35 return 3;
36 default:
37 UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type);
38 return 0;
39 }
40}
41
42u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
43 const Instruction instr = {program_code[pc]};
44 const auto opcode = OpCode::Decode(instr);
45 bool is_bindless = false;
46 switch (opcode->get().GetId()) {
47 case OpCode::Id::TEX: {
48 const TextureType texture_type{instr.tex.texture_type};
49 const bool is_array = instr.tex.array != 0;
50 const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
51 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
52 const auto process_mode = instr.tex.GetTextureProcessMode();
53 WriteTexInstructionFloat(
54 bb, instr,
55 GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {}));
56 break;
57 }
58 case OpCode::Id::TEX_B: {
59 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
60 "AOFFI is not implemented");
61
62 const TextureType texture_type{instr.tex_b.texture_type};
63 const bool is_array = instr.tex_b.array != 0;
64 const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
65 const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC);
66 const auto process_mode = instr.tex_b.GetTextureProcessMode();
67 WriteTexInstructionFloat(bb, instr,
68 GetTexCode(instr, texture_type, process_mode, depth_compare,
69 is_array, is_aoffi, {instr.gpr20}));
70 break;
71 }
72 case OpCode::Id::TEXS: {
73 const TextureType texture_type{instr.texs.GetTextureType()};
74 const bool is_array{instr.texs.IsArrayTexture()};
75 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
76 const auto process_mode = instr.texs.GetTextureProcessMode();
77
78 const Node4 components =
79 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
80
81 if (instr.texs.fp32_flag) {
82 WriteTexsInstructionFloat(bb, instr, components);
83 } else {
84 WriteTexsInstructionHalfFloat(bb, instr, components);
85 }
86 break;
87 }
88 case OpCode::Id::TLD4_B: {
89 is_bindless = true;
90 [[fallthrough]];
91 }
92 case OpCode::Id::TLD4: {
93 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
94 "NDV is not implemented");
95 const auto texture_type = instr.tld4.texture_type.Value();
96 const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
97 : instr.tld4.UsesMiscMode(TextureMiscMode::DC);
98 const bool is_array = instr.tld4.array != 0;
99 const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
100 : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
101 const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP)
102 : instr.tld4.UsesMiscMode(TextureMiscMode::PTP);
103 WriteTexInstructionFloat(bb, instr,
104 GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi,
105 is_ptp, is_bindless));
106 break;
107 }
108 case OpCode::Id::TLD4S: {
109 constexpr std::size_t num_coords = 2;
110 const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
111 const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
112 const Node op_a = GetRegister(instr.gpr8);
113 const Node op_b = GetRegister(instr.gpr20);
114
115 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
116 std::vector<Node> coords;
117 std::vector<Node> aoffi;
118 Node depth_compare;
119 if (is_depth_compare) {
120 // Note: TLD4S coordinate encoding works just like TEXS's
121 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
122 coords.push_back(op_a);
123 coords.push_back(op_y);
124 if (is_aoffi) {
125 aoffi = GetAoffiCoordinates(op_b, num_coords, true);
126 depth_compare = GetRegister(instr.gpr20.Value() + 1);
127 } else {
128 depth_compare = op_b;
129 }
130 } else {
131 // There's no depth compare
132 coords.push_back(op_a);
133 if (is_aoffi) {
134 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
135 aoffi = GetAoffiCoordinates(op_b, num_coords, true);
136 } else {
137 coords.push_back(op_b);
138 }
139 }
140 const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
141
142 SamplerInfo info;
143 info.is_shadow = is_depth_compare;
144 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
145
146 Node4 values;
147 for (u32 element = 0; element < values.size(); ++element) {
148 MetaTexture meta{*sampler, {}, depth_compare, aoffi, {}, {},
149 {}, {}, component, element, {}};
150 values[element] = Operation(OperationCode::TextureGather, meta, coords);
151 }
152
153 if (instr.tld4s.fp16_flag) {
154 WriteTexsInstructionHalfFloat(bb, instr, values, true);
155 } else {
156 WriteTexsInstructionFloat(bb, instr, values, true);
157 }
158 break;
159 }
160 case OpCode::Id::TXD_B:
161 is_bindless = true;
162 [[fallthrough]];
163 case OpCode::Id::TXD: {
164 UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI),
165 "AOFFI is not implemented");
166
167 const bool is_array = instr.txd.is_array != 0;
168 const auto derivate_reg = instr.gpr20.Value();
169 const auto texture_type = instr.txd.texture_type.Value();
170 const auto coord_count = GetCoordCount(texture_type);
171 u64 base_reg = instr.gpr8.Value();
172 Node index_var;
173 SamplerInfo info;
174 info.type = texture_type;
175 info.is_array = is_array;
176 const std::optional<SamplerEntry> sampler =
177 is_bindless ? GetBindlessSampler(base_reg, info, index_var)
178 : GetSampler(instr.sampler, info);
179 Node4 values;
180 if (!sampler) {
181 std::generate(values.begin(), values.end(), [this] { return Immediate(0); });
182 WriteTexInstructionFloat(bb, instr, values);
183 break;
184 }
185
186 if (is_bindless) {
187 base_reg++;
188 }
189
190 std::vector<Node> coords;
191 std::vector<Node> derivates;
192 for (std::size_t i = 0; i < coord_count; ++i) {
193 coords.push_back(GetRegister(base_reg + i));
194 const std::size_t derivate = i * 2;
195 derivates.push_back(GetRegister(derivate_reg + derivate));
196 derivates.push_back(GetRegister(derivate_reg + derivate + 1));
197 }
198
199 Node array_node = {};
200 if (is_array) {
201 const Node info_reg = GetRegister(base_reg + coord_count);
202 array_node = BitfieldExtract(info_reg, 0, 16);
203 }
204
205 for (u32 element = 0; element < values.size(); ++element) {
206 MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates,
207 {}, {}, {}, element, index_var};
208 values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
209 }
210
211 WriteTexInstructionFloat(bb, instr, values);
212
213 break;
214 }
215 case OpCode::Id::TXQ_B:
216 is_bindless = true;
217 [[fallthrough]];
218 case OpCode::Id::TXQ: {
219 Node index_var;
220 const std::optional<SamplerEntry> sampler =
221 is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var)
222 : GetSampler(instr.sampler, {});
223
224 if (!sampler) {
225 u32 indexer = 0;
226 for (u32 element = 0; element < 4; ++element) {
227 if (!instr.txq.IsComponentEnabled(element)) {
228 continue;
229 }
230 const Node value = Immediate(0);
231 SetTemporary(bb, indexer++, value);
232 }
233 for (u32 i = 0; i < indexer; ++i) {
234 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
235 }
236 break;
237 }
238
239 u32 indexer = 0;
240 switch (instr.txq.query_type) {
241 case Tegra::Shader::TextureQueryType::Dimension: {
242 for (u32 element = 0; element < 4; ++element) {
243 if (!instr.txq.IsComponentEnabled(element)) {
244 continue;
245 }
246 MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
247 const Node value =
248 Operation(OperationCode::TextureQueryDimensions, meta,
249 GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
250 SetTemporary(bb, indexer++, value);
251 }
252 for (u32 i = 0; i < indexer; ++i) {
253 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
254 }
255 break;
256 }
257 default:
258 UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value());
259 }
260 break;
261 }
262 case OpCode::Id::TMML_B:
263 is_bindless = true;
264 [[fallthrough]];
265 case OpCode::Id::TMML: {
266 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
267 "NDV is not implemented");
268
269 const auto texture_type = instr.tmml.texture_type.Value();
270 const bool is_array = instr.tmml.array != 0;
271 SamplerInfo info;
272 info.type = texture_type;
273 info.is_array = is_array;
274 Node index_var;
275 const std::optional<SamplerEntry> sampler =
276 is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var)
277 : GetSampler(instr.sampler, info);
278
279 if (!sampler) {
280 u32 indexer = 0;
281 for (u32 element = 0; element < 2; ++element) {
282 if (!instr.tmml.IsComponentEnabled(element)) {
283 continue;
284 }
285 const Node value = Immediate(0);
286 SetTemporary(bb, indexer++, value);
287 }
288 for (u32 i = 0; i < indexer; ++i) {
289 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
290 }
291 break;
292 }
293
294 const u64 base_index = is_array ? 1 : 0;
295 const u64 num_components = [texture_type] {
296 switch (texture_type) {
297 case TextureType::Texture1D:
298 return 1;
299 case TextureType::Texture2D:
300 return 2;
301 case TextureType::TextureCube:
302 return 3;
303 default:
304 UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type);
305 return 2;
306 }
307 }();
308 // TODO: What's the array component used for?
309
310 std::vector<Node> coords;
311 coords.reserve(num_components);
312 for (u64 component = 0; component < num_components; ++component) {
313 coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component));
314 }
315
316 u32 indexer = 0;
317 for (u32 element = 0; element < 2; ++element) {
318 if (!instr.tmml.IsComponentEnabled(element)) {
319 continue;
320 }
321 MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
322 Node value = Operation(OperationCode::TextureQueryLod, meta, coords);
323 SetTemporary(bb, indexer++, std::move(value));
324 }
325 for (u32 i = 0; i < indexer; ++i) {
326 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
327 }
328 break;
329 }
330 case OpCode::Id::TLD: {
331 UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented");
332 UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
333 UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
334
335 WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
336 break;
337 }
338 case OpCode::Id::TLDS: {
339 const TextureType texture_type{instr.tlds.GetTextureType()};
340 const bool is_array{instr.tlds.IsArrayTexture()};
341
342 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
343 "AOFFI is not implemented");
344 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
345
346 const Node4 components = GetTldsCode(instr, texture_type, is_array);
347
348 if (instr.tlds.fp32_flag) {
349 WriteTexsInstructionFloat(bb, instr, components);
350 } else {
351 WriteTexsInstructionHalfFloat(bb, instr, components);
352 }
353 break;
354 }
355 default:
356 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
357 }
358
359 return pc;
360}
361
362ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
363 SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
364 if (info.IsComplete()) {
365 return info;
366 }
367 if (!sampler) {
368 LOG_WARNING(HW_GPU, "Unknown sampler info");
369 info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
370 info.is_array = info.is_array.value_or(false);
371 info.is_shadow = info.is_shadow.value_or(false);
372 info.is_buffer = info.is_buffer.value_or(false);
373 return info;
374 }
375 info.type = info.type.value_or(sampler->texture_type);
376 info.is_array = info.is_array.value_or(sampler->is_array != 0);
377 info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0);
378 info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0);
379 return info;
380}
381
382std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
383 SamplerInfo sampler_info) {
384 const u32 offset = static_cast<u32>(sampler.index.Value());
385 const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
386
387 // If this sampler has already been used, return the existing mapping.
388 const auto it =
389 std::find_if(used_samplers.begin(), used_samplers.end(),
390 [offset](const SamplerEntry& entry) { return entry.offset == offset; });
391 if (it != used_samplers.end()) {
392 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
393 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
394 return *it;
395 }
396
397 // Otherwise create a new mapping for this sampler
398 const auto next_index = static_cast<u32>(used_samplers.size());
399 return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array,
400 *info.is_shadow, *info.is_buffer, false);
401}
402
403std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
404 SamplerInfo info, Node& index_var) {
405 const Node sampler_register = GetRegister(reg);
406 const auto [base_node, tracked_sampler_info] =
407 TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
408 if (!base_node) {
409 UNREACHABLE();
410 return std::nullopt;
411 }
412
413 if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
414 const u32 buffer = sampler_info->index;
415 const u32 offset = sampler_info->offset;
416 info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
417
418 // If this sampler has already been used, return the existing mapping.
419 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
420 [buffer, offset](const SamplerEntry& entry) {
421 return entry.buffer == buffer && entry.offset == offset;
422 });
423 if (it != used_samplers.end()) {
424 ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
425 it->is_shadow == info.is_shadow);
426 return *it;
427 }
428
429 // Otherwise create a new mapping for this sampler
430 const auto next_index = static_cast<u32>(used_samplers.size());
431 return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
432 *info.is_shadow, *info.is_buffer, false);
433 }
434 if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
435 const std::pair indices = sampler_info->indices;
436 const std::pair offsets = sampler_info->offsets;
437 info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
438
439 // Try to use an already created sampler if it exists
440 const auto it =
441 std::find_if(used_samplers.begin(), used_samplers.end(),
442 [indices, offsets](const SamplerEntry& entry) {
443 return offsets == std::pair{entry.offset, entry.secondary_offset} &&
444 indices == std::pair{entry.buffer, entry.secondary_buffer};
445 });
446 if (it != used_samplers.end()) {
447 ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
448 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
449 return *it;
450 }
451
452 // Otherwise create a new mapping for this sampler
453 const u32 next_index = static_cast<u32>(used_samplers.size());
454 return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
455 *info.is_shadow, *info.is_buffer);
456 }
457 if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
458 const u32 base_offset = sampler_info->base_offset / 4;
459 index_var = GetCustomVariable(sampler_info->bindless_var);
460 info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
461
462 // If this sampler has already been used, return the existing mapping.
463 const auto it = std::find_if(
464 used_samplers.begin(), used_samplers.end(),
465 [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; });
466 if (it != used_samplers.end()) {
467 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
468 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer &&
469 it->is_indexed);
470 return *it;
471 }
472
473 uses_indexed_samplers = true;
474 // Otherwise create a new mapping for this sampler
475 const auto next_index = static_cast<u32>(used_samplers.size());
476 return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array,
477 *info.is_shadow, *info.is_buffer, true);
478 }
479 return std::nullopt;
480}
481
482void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
483 u32 dest_elem = 0;
484 for (u32 elem = 0; elem < 4; ++elem) {
485 if (!instr.tex.IsComponentEnabled(elem)) {
486 // Skip disabled components
487 continue;
488 }
489 SetTemporary(bb, dest_elem++, components[elem]);
490 }
491 // After writing values in temporals, move them to the real registers
492 for (u32 i = 0; i < dest_elem; ++i) {
493 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
494 }
495}
496
497void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
498 bool ignore_mask) {
499 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
500 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
501
502 u32 dest_elem = 0;
503 for (u32 component = 0; component < 4; ++component) {
504 if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
505 continue;
506 SetTemporary(bb, dest_elem++, components[component]);
507 }
508
509 for (u32 i = 0; i < dest_elem; ++i) {
510 if (i < 2) {
511 // Write the first two swizzle components to gpr0 and gpr0+1
512 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i));
513 } else {
514 ASSERT(instr.texs.HasTwoDestinations());
515 // Write the rest of the swizzle components to gpr28 and gpr28+1
516 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i));
517 }
518 }
519}
520
521void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
522 const Node4& components, bool ignore_mask) {
523 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
524 // float instruction).
525
526 Node4 values;
527 u32 dest_elem = 0;
528 for (u32 component = 0; component < 4; ++component) {
529 if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
530 continue;
531 values[dest_elem++] = components[component];
532 }
533 if (dest_elem == 0)
534 return;
535
536 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
537
538 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
539 if (dest_elem <= 2) {
540 SetRegister(bb, instr.gpr0, first_value);
541 return;
542 }
543
544 SetTemporary(bb, 0, first_value);
545 SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
546
547 SetRegister(bb, instr.gpr0, GetTemporary(0));
548 SetRegister(bb, instr.gpr28, GetTemporary(1));
549}
550
551Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
552 TextureProcessMode process_mode, std::vector<Node> coords,
553 Node array, Node depth_compare, u32 bias_offset,
554 std::vector<Node> aoffi,
555 std::optional<Tegra::Shader::Register> bindless_reg) {
556 const bool is_array = array != nullptr;
557 const bool is_shadow = depth_compare != nullptr;
558 const bool is_bindless = bindless_reg.has_value();
559
560 ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow,
561 "Illegal texture type");
562
563 SamplerInfo info;
564 info.type = texture_type;
565 info.is_array = is_array;
566 info.is_shadow = is_shadow;
567 info.is_buffer = false;
568
569 Node index_var;
570 const std::optional<SamplerEntry> sampler =
571 is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var)
572 : GetSampler(instr.sampler, info);
573 if (!sampler) {
574 return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)};
575 }
576
577 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
578 process_mode == TextureProcessMode::LL ||
579 process_mode == TextureProcessMode::LLA;
580 const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture;
581
582 Node bias;
583 Node lod;
584 switch (process_mode) {
585 case TextureProcessMode::None:
586 break;
587 case TextureProcessMode::LZ:
588 lod = Immediate(0.0f);
589 break;
590 case TextureProcessMode::LB:
591 // If present, lod or bias are always stored in the register indexed by the gpr20 field with
592 // an offset depending on the usage of the other registers.
593 bias = GetRegister(instr.gpr20.Value() + bias_offset);
594 break;
595 case TextureProcessMode::LL:
596 lod = GetRegister(instr.gpr20.Value() + bias_offset);
597 break;
598 default:
599 UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode);
600 break;
601 }
602
603 Node4 values;
604 for (u32 element = 0; element < values.size(); ++element) {
605 MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias,
606 lod, {}, element, index_var};
607 values[element] = Operation(opcode, meta, coords);
608 }
609
610 return values;
611}
612
613Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
614 TextureProcessMode process_mode, bool depth_compare, bool is_array,
615 bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) {
616 const bool lod_bias_enabled{
617 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
618
619 const bool is_bindless = bindless_reg.has_value();
620
621 u64 parameter_register = instr.gpr20.Value();
622 if (is_bindless) {
623 ++parameter_register;
624 }
625
626 const u32 bias_lod_offset = (is_bindless ? 1 : 0);
627 if (lod_bias_enabled) {
628 ++parameter_register;
629 }
630
631 const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
632 lod_bias_enabled, 4, 5);
633 const auto coord_count = std::get<0>(coord_counts);
634 // If enabled arrays index is always stored in the gpr8 field
635 const u64 array_register = instr.gpr8.Value();
636 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
637 const u64 coord_register = array_register + (is_array ? 1 : 0);
638
639 std::vector<Node> coords;
640 for (std::size_t i = 0; i < coord_count; ++i) {
641 coords.push_back(GetRegister(coord_register + i));
642 }
643 // 1D.DC in OpenGL the 2nd component is ignored.
644 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
645 coords.push_back(Immediate(0.0f));
646 }
647
648 const Node array = is_array ? GetRegister(array_register) : nullptr;
649
650 std::vector<Node> aoffi;
651 if (is_aoffi) {
652 aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
653 }
654
655 Node dc;
656 if (depth_compare) {
657 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
658 // or bias are used
659 dc = GetRegister(parameter_register++);
660 }
661
662 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset,
663 aoffi, bindless_reg);
664}
665
666Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
667 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
668 const bool lod_bias_enabled =
669 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
670
671 const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
672 lod_bias_enabled, 4, 4);
673 const auto coord_count = std::get<0>(coord_counts);
674
675 // If enabled arrays index is always stored in the gpr8 field
676 const u64 array_register = instr.gpr8.Value();
677 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
678 const u64 coord_register = array_register + (is_array ? 1 : 0);
679 const u64 last_coord_register =
680 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
681 ? static_cast<u64>(instr.gpr20.Value())
682 : coord_register + 1;
683 const u32 bias_offset = coord_count > 2 ? 1 : 0;
684
685 std::vector<Node> coords;
686 for (std::size_t i = 0; i < coord_count; ++i) {
687 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
688 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
689 }
690
691 const Node array = is_array ? GetRegister(array_register) : nullptr;
692
693 Node dc;
694 if (depth_compare) {
695 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
696 // or bias are used
697 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
698 dc = GetRegister(depth_register);
699 }
700
701 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {},
702 {});
703}
704
705Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
706 bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) {
707 ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time");
708
709 const std::size_t coord_count = GetCoordCount(texture_type);
710
711 // If enabled arrays index is always stored in the gpr8 field
712 const u64 array_register = instr.gpr8.Value();
713 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
714 const u64 coord_register = array_register + (is_array ? 1 : 0);
715
716 std::vector<Node> coords;
717 for (std::size_t i = 0; i < coord_count; ++i) {
718 coords.push_back(GetRegister(coord_register + i));
719 }
720
721 u64 parameter_register = instr.gpr20.Value();
722
723 SamplerInfo info;
724 info.type = texture_type;
725 info.is_array = is_array;
726 info.is_shadow = depth_compare;
727
728 Node index_var;
729 const std::optional<SamplerEntry> sampler =
730 is_bindless ? GetBindlessSampler(parameter_register++, info, index_var)
731 : GetSampler(instr.sampler, info);
732 Node4 values;
733 if (!sampler) {
734 for (u32 element = 0; element < values.size(); ++element) {
735 values[element] = Immediate(0);
736 }
737 return values;
738 }
739
740 std::vector<Node> aoffi, ptp;
741 if (is_aoffi) {
742 aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
743 } else if (is_ptp) {
744 ptp = GetPtpCoordinates(
745 {GetRegister(parameter_register++), GetRegister(parameter_register++)});
746 }
747
748 Node dc;
749 if (depth_compare) {
750 dc = GetRegister(parameter_register++);
751 }
752
753 const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component))
754 : Immediate(static_cast<u32>(instr.tld4.component));
755
756 for (u32 element = 0; element < values.size(); ++element) {
757 auto coords_copy = coords;
758 MetaTexture meta{
759 *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element,
760 index_var};
761 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
762 }
763
764 return values;
765}
766
767Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
768 const auto texture_type{instr.tld.texture_type};
769 const bool is_array{instr.tld.is_array != 0};
770 const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
771 const std::size_t coord_count{GetCoordCount(texture_type)};
772
773 u64 gpr8_cursor{instr.gpr8.Value()};
774 const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr};
775
776 std::vector<Node> coords;
777 coords.reserve(coord_count);
778 for (std::size_t i = 0; i < coord_count; ++i) {
779 coords.push_back(GetRegister(gpr8_cursor++));
780 }
781
782 u64 gpr20_cursor{instr.gpr20.Value()};
783 // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr};
784 const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)};
785 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
786 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
787
788 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {});
789
790 Node4 values;
791 for (u32 element = 0; element < values.size(); ++element) {
792 auto coords_copy = coords;
793 MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}};
794 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
795 }
796
797 return values;
798}
799
800Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
801 SamplerInfo info;
802 info.type = texture_type;
803 info.is_array = is_array;
804 info.is_shadow = false;
805 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
806
807 const std::size_t type_coord_count = GetCoordCount(texture_type);
808 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
809 const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI);
810
811 // If enabled arrays index is always stored in the gpr8 field
812 const u64 array_register = instr.gpr8.Value();
813 // if is array gpr20 is used
814 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
815
816 const u64 last_coord_register =
817 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
818 ? static_cast<u64>(instr.gpr20.Value())
819 : coord_register + 1;
820
821 std::vector<Node> coords;
822 for (std::size_t i = 0; i < type_coord_count; ++i) {
823 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
824 coords.push_back(
825 GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i));
826 }
827
828 const Node array = is_array ? GetRegister(array_register) : nullptr;
829 // When lod is used always is in gpr20
830 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
831
832 std::vector<Node> aoffi;
833 if (aoffi_enabled) {
834 aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false);
835 }
836
837 Node4 values;
838 for (u32 element = 0; element < values.size(); ++element) {
839 auto coords_copy = coords;
840 MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}};
841 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
842 }
843 return values;
844}
845
846std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
847 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
848 std::size_t max_coords, std::size_t max_inputs) {
849 const std::size_t coord_count = GetCoordCount(texture_type);
850
851 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
852 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
853 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
854 UNIMPLEMENTED_MSG("Unsupported Texture operation");
855 total_coord_count = std::min(total_coord_count, max_coords);
856 }
857 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
858 total_coord_count +=
859 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
860
861 return {coord_count, total_coord_count};
862}
863
864std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
865 bool is_tld4) {
866 const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U};
867 const u32 size = is_tld4 ? 6 : 4;
868 const s32 wrap_value = is_tld4 ? 32 : 8;
869 const s32 diff_value = is_tld4 ? 64 : 16;
870 const u32 mask = (1U << size) - 1;
871
872 std::vector<Node> aoffi;
873 aoffi.reserve(coord_count);
874
875 const auto aoffi_immediate{
876 TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
877 if (!aoffi_immediate) {
878 // Variable access, not supported on AMD.
879 LOG_WARNING(HW_GPU,
880 "AOFFI constant folding failed, some hardware might have graphical issues");
881 for (std::size_t coord = 0; coord < coord_count; ++coord) {
882 const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size);
883 const Node condition =
884 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
885 const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
886 aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
887 }
888 return aoffi;
889 }
890
891 for (std::size_t coord = 0; coord < coord_count; ++coord) {
892 s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask;
893 if (value >= wrap_value) {
894 value -= diff_value;
895 }
896 aoffi.push_back(Immediate(value));
897 }
898 return aoffi;
899}
900
901std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) {
902 static constexpr u32 num_entries = 8;
903
904 std::vector<Node> ptp;
905 ptp.reserve(num_entries);
906
907 const auto global_size = static_cast<s64>(global_code.size());
908 const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size);
909 const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size);
910 if (!low || !high) {
911 for (u32 entry = 0; entry < num_entries; ++entry) {
912 const u32 reg = entry / 4;
913 const u32 offset = entry % 4;
914 const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6);
915 const Node condition =
916 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32));
917 const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64));
918 ptp.push_back(Operation(OperationCode::Select, condition, negative, value));
919 }
920 return ptp;
921 }
922
923 const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low);
924 for (u32 entry = 0; entry < num_entries; ++entry) {
925 s32 value = (immediate >> (entry * 8)) & 0b111111;
926 if (value >= 32) {
927 value -= 64;
928 }
929 ptp.push_back(Immediate(value));
930 }
931
932 return ptp;
933}
934
935} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
deleted file mode 100644
index 1c0957277..000000000
--- a/src/video_core/shader/decode/video.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using std::move;
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::Pred;
17using Tegra::Shader::VideoType;
18using Tegra::Shader::VmadShr;
19using Tegra::Shader::VmnmxOperation;
20using Tegra::Shader::VmnmxType;
21
22u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
23 const Instruction instr = {program_code[pc]};
24 const auto opcode = OpCode::Decode(instr);
25
26 if (opcode->get().GetId() == OpCode::Id::VMNMX) {
27 DecodeVMNMX(bb, instr);
28 return pc;
29 }
30
31 const Node op_a =
32 GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
33 instr.video.type_a, instr.video.byte_height_a);
34 const Node op_b = [this, instr] {
35 if (instr.video.use_register_b) {
36 return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
37 instr.video.signed_b, instr.video.type_b,
38 instr.video.byte_height_b);
39 }
40 if (instr.video.signed_b) {
41 const auto imm = static_cast<s16>(instr.alu.GetImm20_16());
42 return Immediate(static_cast<u32>(imm));
43 } else {
44 return Immediate(instr.alu.GetImm20_16());
45 }
46 }();
47
48 switch (opcode->get().GetId()) {
49 case OpCode::Id::VMAD: {
50 const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
51 const Node op_c = GetRegister(instr.gpr39);
52
53 Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b);
54 value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c);
55
56 if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) {
57 const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15);
58 value =
59 SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift);
60 }
61
62 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
63 SetRegister(bb, instr.gpr0, value);
64 break;
65 }
66 case OpCode::Id::VSETP: {
67 // We can't use the constant predicate as destination.
68 ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
69
70 const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1;
71 const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b);
72 const Node second_pred = GetPredicate(instr.vsetp.pred39, false);
73
74 const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op);
75
76 // Set the primary predicate to the result of Predicate OP SecondPredicate
77 SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred));
78
79 if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
80 // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
81 // if enabled
82 const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred);
83 SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred));
84 }
85 break;
86 }
87 default:
88 UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName());
89 }
90
91 return pc;
92}
93
94Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type,
95 u64 byte_height) {
96 if (!is_chunk) {
97 return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
98 }
99
100 switch (type) {
101 case VideoType::Size16_Low:
102 return BitfieldExtract(op, 0, 16);
103 case VideoType::Size16_High:
104 return BitfieldExtract(op, 16, 16);
105 case VideoType::Size32:
106 // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
107 // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
108 UNIMPLEMENTED();
109 return Immediate(0);
110 case VideoType::Invalid:
111 UNREACHABLE_MSG("Invalid instruction encoding");
112 return Immediate(0);
113 default:
114 UNREACHABLE();
115 return Immediate(0);
116 }
117}
118
119void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
120 UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
121 UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
122 UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
123 UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
124 UNIMPLEMENTED_IF(instr.vmnmx.sat);
125 UNIMPLEMENTED_IF(instr.generates_cc);
126
127 Node op_a = GetRegister(instr.gpr8);
128 Node op_b = GetRegister(instr.gpr20);
129 Node op_c = GetRegister(instr.gpr39);
130
131 const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
132 const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
133
134 const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
135 Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
136
137 switch (instr.vmnmx.operation) {
138 case VmnmxOperation::Mrg_16H:
139 value = BitfieldInsert(move(op_c), move(value), 16, 16);
140 break;
141 case VmnmxOperation::Mrg_16L:
142 value = BitfieldInsert(move(op_c), move(value), 0, 16);
143 break;
144 case VmnmxOperation::Mrg_8B0:
145 value = BitfieldInsert(move(op_c), move(value), 0, 8);
146 break;
147 case VmnmxOperation::Mrg_8B2:
148 value = BitfieldInsert(move(op_c), move(value), 16, 8);
149 break;
150 case VmnmxOperation::Acc:
151 value = Operation(OperationCode::IAdd, move(value), move(op_c));
152 break;
153 case VmnmxOperation::Min:
154 value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
155 break;
156 case VmnmxOperation::Max:
157 value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
158 break;
159 case VmnmxOperation::Nop:
160 break;
161 default:
162 UNREACHABLE();
163 break;
164 }
165
166 SetRegister(bb, instr.gpr0, move(value));
167}
168
169} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
deleted file mode 100644
index 37433d783..000000000
--- a/src/video_core/shader/decode/warp.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16using Tegra::Shader::ShuffleOperation;
17using Tegra::Shader::VoteOperation;
18
19namespace {
20
21OperationCode GetOperationCode(VoteOperation vote_op) {
22 switch (vote_op) {
23 case VoteOperation::All:
24 return OperationCode::VoteAll;
25 case VoteOperation::Any:
26 return OperationCode::VoteAny;
27 case VoteOperation::Eq:
28 return OperationCode::VoteEqual;
29 default:
30 UNREACHABLE_MSG("Invalid vote operation={}", vote_op);
31 return OperationCode::VoteAll;
32 }
33}
34
35} // Anonymous namespace
36
37u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
38 const Instruction instr = {program_code[pc]};
39 const auto opcode = OpCode::Decode(instr);
40
41 // Signal the backend that this shader uses warp instructions.
42 uses_warps = true;
43
44 switch (opcode->get().GetId()) {
45 case OpCode::Id::VOTE: {
46 const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
47 const Node active = Operation(OperationCode::BallotThread, value);
48 const Node vote = Operation(GetOperationCode(instr.vote.operation), value);
49 SetRegister(bb, instr.gpr0, active);
50 SetPredicate(bb, instr.vote.dest_pred, vote);
51 break;
52 }
53 case OpCode::Id::SHFL: {
54 Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
55 : GetRegister(instr.gpr39);
56 Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
57 : GetRegister(instr.gpr20);
58
59 Node thread_id = Operation(OperationCode::ThreadId);
60 Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
61 Node seg_mask = BitfieldExtract(mask, 8, 16);
62
63 Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
64 Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
65 Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
66 Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
67
68 Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
69 switch (instr.shfl.operation) {
70 case ShuffleOperation::Idx:
71 return Operation(OperationCode::IBitwiseOr,
72 Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
73 min_thread_id);
74 case ShuffleOperation::Down:
75 return Operation(OperationCode::IAdd, thread_id, index);
76 case ShuffleOperation::Up:
77 return Operation(OperationCode::IAdd, thread_id,
78 Operation(OperationCode::INegate, index));
79 case ShuffleOperation::Bfly:
80 return Operation(OperationCode::IBitwiseXor, thread_id, index);
81 }
82 UNREACHABLE();
83 return Immediate(0U);
84 }();
85
86 Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
87 if (instr.shfl.operation == ShuffleOperation::Up) {
88 return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
89 } else {
90 return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
91 }
92 }();
93
94 SetPredicate(bb, instr.shfl.pred48, in_bounds);
95 SetRegister(
96 bb, instr.gpr0,
97 Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
98 break;
99 }
100 case OpCode::Id::FSWZADD: {
101 UNIMPLEMENTED_IF(instr.fswzadd.ndv);
102
103 Node op_a = GetRegister(instr.gpr8);
104 Node op_b = GetRegister(instr.gpr20);
105 Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle));
106 SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask));
107 break;
108 }
109 default:
110 UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
111 break;
112 }
113
114 return pc;
115}
116
117} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
deleted file mode 100644
index 233b8fa42..000000000
--- a/src/video_core/shader/decode/xmad.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::PredCondition;
16
17u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20
21 UNIMPLEMENTED_IF(instr.xmad.sign_a);
22 UNIMPLEMENTED_IF(instr.xmad.sign_b);
23 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
24 "Condition codes generation in XMAD is not implemented");
25
26 Node op_a = GetRegister(instr.gpr8);
27
28 // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
29 UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
30 const bool is_signed_a = instr.xmad.sign_a == 1;
31 const bool is_signed_b = instr.xmad.sign_b == 1;
32 const bool is_signed_c = is_signed_a;
33
34 auto [is_merge, is_psl, is_high_b, mode, op_b_binding,
35 op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
36 switch (opcode->get().GetId()) {
37 case OpCode::Id::XMAD_CR:
38 return {instr.xmad.merge_56,
39 instr.xmad.product_shift_left_second,
40 instr.xmad.high_b,
41 instr.xmad.mode_cbf,
42 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
43 GetRegister(instr.gpr39)};
44 case OpCode::Id::XMAD_RR:
45 return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr,
46 instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
47 case OpCode::Id::XMAD_RC:
48 return {false,
49 false,
50 instr.xmad.high_b,
51 instr.xmad.mode_cbf,
52 GetRegister(instr.gpr39),
53 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
54 case OpCode::Id::XMAD_IMM:
55 return {instr.xmad.merge_37,
56 instr.xmad.product_shift_left,
57 false,
58 instr.xmad.mode,
59 Immediate(static_cast<u32>(instr.xmad.imm20_16)),
60 GetRegister(instr.gpr39)};
61 default:
62 UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
63 return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
64 }
65 }();
66
67 op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a),
68 instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16));
69
70 const Node original_b = op_b_binding;
71 const Node op_b =
72 SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding),
73 is_high_b ? Immediate(16) : Immediate(0), Immediate(16));
74
75 // we already check sign_a and sign_b is difference or not before so just use one in here.
76 Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b);
77 if (is_psl) {
78 product =
79 SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16));
80 }
81 SetTemporary(bb, 0, product);
82 product = GetTemporary(0);
83
84 Node original_c = op_c;
85 const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
86 op_c = [&] {
87 switch (set_mode) {
88 case Tegra::Shader::XmadMode::None:
89 return original_c;
90 case Tegra::Shader::XmadMode::CLo:
91 return BitfieldExtract(std::move(original_c), 0, 16);
92 case Tegra::Shader::XmadMode::CHi:
93 return BitfieldExtract(std::move(original_c), 16, 16);
94 case Tegra::Shader::XmadMode::CBcc: {
95 Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
96 original_b, Immediate(16));
97 return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c),
98 std::move(shifted_b));
99 }
100 case Tegra::Shader::XmadMode::CSfu: {
101 const Node comp_a =
102 GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0));
103 const Node comp_b =
104 GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0));
105 const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
106
107 const Node comp_minus_a = GetPredicateComparisonInteger(
108 PredCondition::NE, is_signed_a,
109 SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
110 Immediate(0x80000000)),
111 Immediate(0));
112 const Node comp_minus_b = GetPredicateComparisonInteger(
113 PredCondition::NE, is_signed_b,
114 SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
115 Immediate(0x80000000)),
116 Immediate(0));
117
118 Node new_c = Operation(
119 OperationCode::Select, comp_minus_a,
120 SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)),
121 original_c);
122 new_c = Operation(
123 OperationCode::Select, comp_minus_b,
124 SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)),
125 std::move(new_c));
126
127 return Operation(OperationCode::Select, comp, original_c, std::move(new_c));
128 }
129 default:
130 UNREACHABLE();
131 return Immediate(0);
132 }
133 }();
134
135 SetTemporary(bb, 1, op_c);
136 op_c = GetTemporary(1);
137
138 // TODO(Rodrigo): Use an appropiate sign for this operation
139 Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c));
140 SetTemporary(bb, 2, sum);
141 sum = GetTemporary(2);
142 if (is_merge) {
143 const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum),
144 Immediate(0), Immediate(16));
145 const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b,
146 Immediate(16));
147 sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b);
148 }
149
150 SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
151 SetRegister(bb, instr.gpr0, std::move(sum));
152
153 return pc;
154}
155
156} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp
deleted file mode 100644
index 2647865d4..000000000
--- a/src/video_core/shader/expr.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <memory>
6#include <variant>
7
8#include "video_core/shader/expr.h"
9
10namespace VideoCommon::Shader {
11namespace {
12bool ExprIsBoolean(const Expr& expr) {
13 return std::holds_alternative<ExprBoolean>(*expr);
14}
15
16bool ExprBooleanGet(const Expr& expr) {
17 return std::get_if<ExprBoolean>(expr.get())->value;
18}
19} // Anonymous namespace
20
21bool ExprAnd::operator==(const ExprAnd& b) const {
22 return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
23}
24
25bool ExprAnd::operator!=(const ExprAnd& b) const {
26 return !operator==(b);
27}
28
29bool ExprOr::operator==(const ExprOr& b) const {
30 return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
31}
32
33bool ExprOr::operator!=(const ExprOr& b) const {
34 return !operator==(b);
35}
36
37bool ExprNot::operator==(const ExprNot& b) const {
38 return *operand1 == *b.operand1;
39}
40
41bool ExprNot::operator!=(const ExprNot& b) const {
42 return !operator==(b);
43}
44
45Expr MakeExprNot(Expr first) {
46 if (std::holds_alternative<ExprNot>(*first)) {
47 return std::get_if<ExprNot>(first.get())->operand1;
48 }
49 return MakeExpr<ExprNot>(std::move(first));
50}
51
52Expr MakeExprAnd(Expr first, Expr second) {
53 if (ExprIsBoolean(first)) {
54 return ExprBooleanGet(first) ? second : first;
55 }
56 if (ExprIsBoolean(second)) {
57 return ExprBooleanGet(second) ? first : second;
58 }
59 return MakeExpr<ExprAnd>(std::move(first), std::move(second));
60}
61
62Expr MakeExprOr(Expr first, Expr second) {
63 if (ExprIsBoolean(first)) {
64 return ExprBooleanGet(first) ? first : second;
65 }
66 if (ExprIsBoolean(second)) {
67 return ExprBooleanGet(second) ? second : first;
68 }
69 return MakeExpr<ExprOr>(std::move(first), std::move(second));
70}
71
72bool ExprAreEqual(const Expr& first, const Expr& second) {
73 return (*first) == (*second);
74}
75
76bool ExprAreOpposite(const Expr& first, const Expr& second) {
77 if (std::holds_alternative<ExprNot>(*first)) {
78 return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second);
79 }
80 if (std::holds_alternative<ExprNot>(*second)) {
81 return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first);
82 }
83 return false;
84}
85
86bool ExprIsTrue(const Expr& first) {
87 if (ExprIsBoolean(first)) {
88 return ExprBooleanGet(first);
89 }
90 return false;
91}
92
93} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h
deleted file mode 100644
index cda284c72..000000000
--- a/src/video_core/shader/expr.h
+++ /dev/null
@@ -1,156 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <variant>
9
10#include "video_core/engines/shader_bytecode.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::ConditionCode;
15using Tegra::Shader::Pred;
16
17class ExprAnd;
18class ExprBoolean;
19class ExprCondCode;
20class ExprGprEqual;
21class ExprNot;
22class ExprOr;
23class ExprPredicate;
24class ExprVar;
25
26using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd,
27 ExprBoolean, ExprGprEqual>;
28using Expr = std::shared_ptr<ExprData>;
29
30class ExprAnd final {
31public:
32 explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
33
34 bool operator==(const ExprAnd& b) const;
35 bool operator!=(const ExprAnd& b) const;
36
37 Expr operand1;
38 Expr operand2;
39};
40
41class ExprOr final {
42public:
43 explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
44
45 bool operator==(const ExprOr& b) const;
46 bool operator!=(const ExprOr& b) const;
47
48 Expr operand1;
49 Expr operand2;
50};
51
52class ExprNot final {
53public:
54 explicit ExprNot(Expr a) : operand1{std::move(a)} {}
55
56 bool operator==(const ExprNot& b) const;
57 bool operator!=(const ExprNot& b) const;
58
59 Expr operand1;
60};
61
62class ExprVar final {
63public:
64 explicit ExprVar(u32 index) : var_index{index} {}
65
66 bool operator==(const ExprVar& b) const {
67 return var_index == b.var_index;
68 }
69
70 bool operator!=(const ExprVar& b) const {
71 return !operator==(b);
72 }
73
74 u32 var_index;
75};
76
77class ExprPredicate final {
78public:
79 explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {}
80
81 bool operator==(const ExprPredicate& b) const {
82 return predicate == b.predicate;
83 }
84
85 bool operator!=(const ExprPredicate& b) const {
86 return !operator==(b);
87 }
88
89 u32 predicate;
90};
91
92class ExprCondCode final {
93public:
94 explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {}
95
96 bool operator==(const ExprCondCode& b) const {
97 return cc == b.cc;
98 }
99
100 bool operator!=(const ExprCondCode& b) const {
101 return !operator==(b);
102 }
103
104 ConditionCode cc;
105};
106
107class ExprBoolean final {
108public:
109 explicit ExprBoolean(bool val) : value{val} {}
110
111 bool operator==(const ExprBoolean& b) const {
112 return value == b.value;
113 }
114
115 bool operator!=(const ExprBoolean& b) const {
116 return !operator==(b);
117 }
118
119 bool value;
120};
121
122class ExprGprEqual final {
123public:
124 explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {}
125
126 bool operator==(const ExprGprEqual& b) const {
127 return gpr == b.gpr && value == b.value;
128 }
129
130 bool operator!=(const ExprGprEqual& b) const {
131 return !operator==(b);
132 }
133
134 u32 gpr;
135 u32 value;
136};
137
138template <typename T, typename... Args>
139Expr MakeExpr(Args&&... args) {
140 static_assert(std::is_convertible_v<T, ExprData>);
141 return std::make_shared<ExprData>(T(std::forward<Args>(args)...));
142}
143
144bool ExprAreEqual(const Expr& first, const Expr& second);
145
146bool ExprAreOpposite(const Expr& first, const Expr& second);
147
148Expr MakeExprNot(Expr first);
149
150Expr MakeExprAnd(Expr first, Expr second);
151
152Expr MakeExprOr(Expr first, Expr second);
153
154bool ExprIsTrue(const Expr& first);
155
156} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
deleted file mode 100644
index e18ccba8e..000000000
--- a/src/video_core/shader/memory_util.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstddef>
7
8#include <boost/container_hash/hash.hpp>
9
10#include "common/common_types.h"
11#include "core/core.h"
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/memory_manager.h"
14#include "video_core/shader/memory_util.h"
15#include "video_core/shader/shader_ir.h"
16
17namespace VideoCommon::Shader {
18
19GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
20 Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
21 const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]};
22 return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset;
23}
24
25bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
26 // Sched instructions appear once every 4 instructions.
27 constexpr std::size_t SchedPeriod = 4;
28 const std::size_t absolute_offset = offset - main_offset;
29 return (absolute_offset % SchedPeriod) == 0;
30}
31
32std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
33 // This is the encoded version of BRA that jumps to itself. All Nvidia
34 // shaders end with one.
35 static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL;
36 static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL;
37
38 const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
39 std::size_t offset = start_offset;
40 while (offset < program.size()) {
41 const u64 instruction = program[offset];
42 if (!IsSchedInstruction(offset, start_offset)) {
43 if ((instruction & MASK) == SELF_JUMPING_BRANCH) {
44 // End on Maxwell's "nop" instruction
45 break;
46 }
47 if (instruction == 0) {
48 break;
49 }
50 }
51 ++offset;
52 }
53 // The last instruction is included in the program size
54 return std::min(offset + 1, program.size());
55}
56
57ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
58 const u8* host_ptr, bool is_compute) {
59 ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
60 ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; });
61 memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64));
62 code.resize(CalculateProgramSize(code, is_compute));
63 return code;
64}
65
66u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
67 const ProgramCode& code_b) {
68 size_t unique_identifier = boost::hash_value(code);
69 if (is_a) {
70 // VertexA programs include two programs
71 boost::hash_combine(unique_identifier, boost::hash_value(code_b));
72 }
73 return static_cast<u64>(unique_identifier);
74}
75
76} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h
deleted file mode 100644
index 4624d38e6..000000000
--- a/src/video_core/shader/memory_util.h
+++ /dev/null
@@ -1,43 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <vector>
9
10#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/engines/shader_type.h"
13
14namespace Tegra {
15class MemoryManager;
16}
17
18namespace VideoCommon::Shader {
19
20using ProgramCode = std::vector<u64>;
21
22constexpr u32 STAGE_MAIN_OFFSET = 10;
23constexpr u32 KERNEL_MAIN_OFFSET = 0;
24
25/// Gets the address for the specified shader stage program
26GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
27 Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
28
29/// Gets if the current instruction offset is a scheduler instruction
30bool IsSchedInstruction(std::size_t offset, std::size_t main_offset);
31
32/// Calculates the size of a program stream
33std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute);
34
35/// Gets the shader program code from memory for the specified address
36ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
37 const u8* host_ptr, bool is_compute);
38
39/// Hashes one (or two) program streams
40u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
41 const ProgramCode& code_b = {});
42
43} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
deleted file mode 100644
index b54d33763..000000000
--- a/src/video_core/shader/node.h
+++ /dev/null
@@ -1,701 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstddef>
9#include <memory>
10#include <optional>
11#include <string>
12#include <tuple>
13#include <utility>
14#include <variant>
15#include <vector>
16
17#include "common/common_types.h"
18#include "video_core/engines/shader_bytecode.h"
19
20namespace VideoCommon::Shader {
21
22enum class OperationCode {
23 Assign, /// (float& dest, float src) -> void
24
25 Select, /// (MetaArithmetic, bool pred, float a, float b) -> float
26
27 FAdd, /// (MetaArithmetic, float a, float b) -> float
28 FMul, /// (MetaArithmetic, float a, float b) -> float
29 FDiv, /// (MetaArithmetic, float a, float b) -> float
30 FFma, /// (MetaArithmetic, float a, float b, float c) -> float
31 FNegate, /// (MetaArithmetic, float a) -> float
32 FAbsolute, /// (MetaArithmetic, float a) -> float
33 FClamp, /// (MetaArithmetic, float value, float min, float max) -> float
34 FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float
35 FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float
36 FMin, /// (MetaArithmetic, float a, float b) -> float
37 FMax, /// (MetaArithmetic, float a, float b) -> float
38 FCos, /// (MetaArithmetic, float a) -> float
39 FSin, /// (MetaArithmetic, float a) -> float
40 FExp2, /// (MetaArithmetic, float a) -> float
41 FLog2, /// (MetaArithmetic, float a) -> float
42 FInverseSqrt, /// (MetaArithmetic, float a) -> float
43 FSqrt, /// (MetaArithmetic, float a) -> float
44 FRoundEven, /// (MetaArithmetic, float a) -> float
45 FFloor, /// (MetaArithmetic, float a) -> float
46 FCeil, /// (MetaArithmetic, float a) -> float
47 FTrunc, /// (MetaArithmetic, float a) -> float
48 FCastInteger, /// (MetaArithmetic, int a) -> float
49 FCastUInteger, /// (MetaArithmetic, uint a) -> float
50 FSwizzleAdd, /// (float a, float b, uint mask) -> float
51
52 IAdd, /// (MetaArithmetic, int a, int b) -> int
53 IMul, /// (MetaArithmetic, int a, int b) -> int
54 IDiv, /// (MetaArithmetic, int a, int b) -> int
55 INegate, /// (MetaArithmetic, int a) -> int
56 IAbsolute, /// (MetaArithmetic, int a) -> int
57 IMin, /// (MetaArithmetic, int a, int b) -> int
58 IMax, /// (MetaArithmetic, int a, int b) -> int
59 ICastFloat, /// (MetaArithmetic, float a) -> int
60 ICastUnsigned, /// (MetaArithmetic, uint a) -> int
61 ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int
62 ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int
63 IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int
64 IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int
65 IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int
66 IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int
67 IBitwiseNot, /// (MetaArithmetic, int a) -> int
68 IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
69 IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int
70 IBitCount, /// (MetaArithmetic, int) -> int
71 IBitMSB, /// (MetaArithmetic, int) -> int
72
73 UAdd, /// (MetaArithmetic, uint a, uint b) -> uint
74 UMul, /// (MetaArithmetic, uint a, uint b) -> uint
75 UDiv, /// (MetaArithmetic, uint a, uint b) -> uint
76 UMin, /// (MetaArithmetic, uint a, uint b) -> uint
77 UMax, /// (MetaArithmetic, uint a, uint b) -> uint
78 UCastFloat, /// (MetaArithmetic, float a) -> uint
79 UCastSigned, /// (MetaArithmetic, int a) -> uint
80 ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint
81 ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
82 UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
83 UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint
84 UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint
85 UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint
86 UBitwiseNot, /// (MetaArithmetic, uint a) -> uint
87 UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
88 UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
89 UBitCount, /// (MetaArithmetic, uint) -> uint
90 UBitMSB, /// (MetaArithmetic, uint) -> uint
91
92 HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
93 HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
94 HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
95 HAbsolute, /// (f16vec2 a) -> f16vec2
96 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
97 HClamp, /// (f16vec2 src, float min, float max) -> f16vec2
98 HCastFloat, /// (MetaArithmetic, float a) -> f16vec2
99 HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2
100 HMergeF32, /// (f16vec2 src) -> float
101 HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2
102 HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2
103 HPack2, /// (float a, float b) -> f16vec2
104
105 LogicalAssign, /// (bool& dst, bool src) -> void
106 LogicalAnd, /// (bool a, bool b) -> bool
107 LogicalOr, /// (bool a, bool b) -> bool
108 LogicalXor, /// (bool a, bool b) -> bool
109 LogicalNegate, /// (bool a) -> bool
110 LogicalPick2, /// (bool2 pair, uint index) -> bool
111 LogicalAnd2, /// (bool2 a) -> bool
112
113 LogicalFOrdLessThan, /// (float a, float b) -> bool
114 LogicalFOrdEqual, /// (float a, float b) -> bool
115 LogicalFOrdLessEqual, /// (float a, float b) -> bool
116 LogicalFOrdGreaterThan, /// (float a, float b) -> bool
117 LogicalFOrdNotEqual, /// (float a, float b) -> bool
118 LogicalFOrdGreaterEqual, /// (float a, float b) -> bool
119 LogicalFOrdered, /// (float a, float b) -> bool
120 LogicalFUnordered, /// (float a, float b) -> bool
121 LogicalFUnordLessThan, /// (float a, float b) -> bool
122 LogicalFUnordEqual, /// (float a, float b) -> bool
123 LogicalFUnordLessEqual, /// (float a, float b) -> bool
124 LogicalFUnordGreaterThan, /// (float a, float b) -> bool
125 LogicalFUnordNotEqual, /// (float a, float b) -> bool
126 LogicalFUnordGreaterEqual, /// (float a, float b) -> bool
127
128 LogicalILessThan, /// (int a, int b) -> bool
129 LogicalIEqual, /// (int a, int b) -> bool
130 LogicalILessEqual, /// (int a, int b) -> bool
131 LogicalIGreaterThan, /// (int a, int b) -> bool
132 LogicalINotEqual, /// (int a, int b) -> bool
133 LogicalIGreaterEqual, /// (int a, int b) -> bool
134
135 LogicalULessThan, /// (uint a, uint b) -> bool
136 LogicalUEqual, /// (uint a, uint b) -> bool
137 LogicalULessEqual, /// (uint a, uint b) -> bool
138 LogicalUGreaterThan, /// (uint a, uint b) -> bool
139 LogicalUNotEqual, /// (uint a, uint b) -> bool
140 LogicalUGreaterEqual, /// (uint a, uint b) -> bool
141
142 LogicalAddCarry, /// (uint a, uint b) -> bool
143
144 Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
145 Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
146 Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
147 Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
148 Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
149 Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
150 Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
151 Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
152 Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
153 Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
154 Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
155 Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
156
157 Texture, /// (MetaTexture, float[N] coords) -> float4
158 TextureLod, /// (MetaTexture, float[N] coords) -> float4
159 TextureGather, /// (MetaTexture, float[N] coords) -> float4
160 TextureQueryDimensions, /// (MetaTexture, float a) -> float4
161 TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
162 TexelFetch, /// (MetaTexture, int[N], int) -> float4
163 TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4
164
165 ImageLoad, /// (MetaImage, int[N] coords) -> void
166 ImageStore, /// (MetaImage, int[N] coords) -> void
167
168 AtomicImageAdd, /// (MetaImage, int[N] coords) -> void
169 AtomicImageAnd, /// (MetaImage, int[N] coords) -> void
170 AtomicImageOr, /// (MetaImage, int[N] coords) -> void
171 AtomicImageXor, /// (MetaImage, int[N] coords) -> void
172 AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
173
174 AtomicUExchange, /// (memory, uint) -> uint
175 AtomicUAdd, /// (memory, uint) -> uint
176 AtomicUMin, /// (memory, uint) -> uint
177 AtomicUMax, /// (memory, uint) -> uint
178 AtomicUAnd, /// (memory, uint) -> uint
179 AtomicUOr, /// (memory, uint) -> uint
180 AtomicUXor, /// (memory, uint) -> uint
181
182 AtomicIExchange, /// (memory, int) -> int
183 AtomicIAdd, /// (memory, int) -> int
184 AtomicIMin, /// (memory, int) -> int
185 AtomicIMax, /// (memory, int) -> int
186 AtomicIAnd, /// (memory, int) -> int
187 AtomicIOr, /// (memory, int) -> int
188 AtomicIXor, /// (memory, int) -> int
189
190 ReduceUAdd, /// (memory, uint) -> void
191 ReduceUMin, /// (memory, uint) -> void
192 ReduceUMax, /// (memory, uint) -> void
193 ReduceUAnd, /// (memory, uint) -> void
194 ReduceUOr, /// (memory, uint) -> void
195 ReduceUXor, /// (memory, uint) -> void
196
197 ReduceIAdd, /// (memory, int) -> void
198 ReduceIMin, /// (memory, int) -> void
199 ReduceIMax, /// (memory, int) -> void
200 ReduceIAnd, /// (memory, int) -> void
201 ReduceIOr, /// (memory, int) -> void
202 ReduceIXor, /// (memory, int) -> void
203
204 Branch, /// (uint branch_target) -> void
205 BranchIndirect, /// (uint branch_target) -> void
206 PushFlowStack, /// (uint branch_target) -> void
207 PopFlowStack, /// () -> void
208 Exit, /// () -> void
209 Discard, /// () -> void
210
211 EmitVertex, /// () -> void
212 EndPrimitive, /// () -> void
213
214 InvocationId, /// () -> int
215 YNegate, /// () -> float
216 LocalInvocationIdX, /// () -> uint
217 LocalInvocationIdY, /// () -> uint
218 LocalInvocationIdZ, /// () -> uint
219 WorkGroupIdX, /// () -> uint
220 WorkGroupIdY, /// () -> uint
221 WorkGroupIdZ, /// () -> uint
222
223 BallotThread, /// (bool) -> uint
224 VoteAll, /// (bool) -> bool
225 VoteAny, /// (bool) -> bool
226 VoteEqual, /// (bool) -> bool
227
228 ThreadId, /// () -> uint
229 ThreadEqMask, /// () -> uint
230 ThreadGeMask, /// () -> uint
231 ThreadGtMask, /// () -> uint
232 ThreadLeMask, /// () -> uint
233 ThreadLtMask, /// () -> uint
234 ShuffleIndexed, /// (uint value, uint index) -> uint
235
236 Barrier, /// () -> void
237 MemoryBarrierGroup, /// () -> void
238 MemoryBarrierGlobal, /// () -> void
239
240 Amount,
241};
242
243enum class InternalFlag {
244 Zero = 0,
245 Sign = 1,
246 Carry = 2,
247 Overflow = 3,
248 Amount = 4,
249};
250
251enum class MetaStackClass {
252 Ssy,
253 Pbk,
254};
255
256class OperationNode;
257class ConditionalNode;
258class GprNode;
259class CustomVarNode;
260class ImmediateNode;
261class InternalFlagNode;
262class PredicateNode;
263class AbufNode;
264class CbufNode;
265class LmemNode;
266class PatchNode;
267class SmemNode;
268class GmemNode;
269class CommentNode;
270
271using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode,
272 InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
273 LmemNode, SmemNode, GmemNode, CommentNode>;
274using Node = std::shared_ptr<NodeData>;
275using Node4 = std::array<Node, 4>;
276using NodeBlock = std::vector<Node>;
277
278struct ArraySamplerNode;
279struct BindlessSamplerNode;
280struct SeparateSamplerNode;
281
282using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
283using TrackSampler = std::shared_ptr<TrackSamplerData>;
284
285struct SamplerEntry {
286 /// Bound samplers constructor
287 explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_,
288 bool is_shadow_, bool is_buffer_, bool is_indexed_)
289 : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_},
290 is_buffer{is_buffer_}, is_indexed{is_indexed_} {}
291
292 /// Separate sampler constructor
293 explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
294 Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_,
295 bool is_buffer_)
296 : index{index_}, offset{offsets.first}, secondary_offset{offsets.second},
297 buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_},
298 is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {}
299
300 /// Bindless samplers constructor
301 explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_,
302 bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_)
303 : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_},
304 is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} {
305 }
306
307 u32 index = 0; ///< Emulated index given for the this sampler.
308 u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
309 u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
310 u32 buffer = 0; ///< Buffer where the bindless sampler is read.
311 u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
312 u32 size = 1; ///< Size of the sampler.
313
314 Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
315 bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
316 bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
317 bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
318 bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
319 bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
320 bool is_separated = false; ///< Whether the image and sampler is separated or not.
321};
322
323/// Represents a tracked bindless sampler into a direct const buffer
324struct ArraySamplerNode {
325 u32 index;
326 u32 base_offset;
327 u32 bindless_var;
328};
329
330/// Represents a tracked separate sampler image pair that was folded statically
331struct SeparateSamplerNode {
332 std::pair<u32, u32> indices;
333 std::pair<u32, u32> offsets;
334};
335
336/// Represents a tracked bindless sampler into a direct const buffer
337struct BindlessSamplerNode {
338 u32 index;
339 u32 offset;
340};
341
342struct ImageEntry {
343public:
344 /// Bound images constructor
345 explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_)
346 : index{index_}, offset{offset_}, type{type_} {}
347
348 /// Bindless samplers constructor
349 explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_)
350 : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {}
351
352 void MarkWrite() {
353 is_written = true;
354 }
355
356 void MarkRead() {
357 is_read = true;
358 }
359
360 void MarkAtomic() {
361 MarkWrite();
362 MarkRead();
363 is_atomic = true;
364 }
365
366 u32 index = 0;
367 u32 offset = 0;
368 u32 buffer = 0;
369
370 Tegra::Shader::ImageType type{};
371 bool is_bindless = false;
372 bool is_written = false;
373 bool is_read = false;
374 bool is_atomic = false;
375};
376
377struct GlobalMemoryBase {
378 u32 cbuf_index = 0;
379 u32 cbuf_offset = 0;
380
381 [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const {
382 return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
383 }
384};
385
386/// Parameters describing an arithmetic operation
387struct MetaArithmetic {
388 bool precise{}; ///< Whether the operation can be constraint or not
389};
390
391/// Parameters describing a texture sampler
392struct MetaTexture {
393 SamplerEntry sampler;
394 Node array;
395 Node depth_compare;
396 std::vector<Node> aoffi;
397 std::vector<Node> ptp;
398 std::vector<Node> derivates;
399 Node bias;
400 Node lod;
401 Node component;
402 u32 element{};
403 Node index;
404};
405
406struct MetaImage {
407 const ImageEntry& image;
408 std::vector<Node> values;
409 u32 element{};
410};
411
412/// Parameters that modify an operation but are not part of any particular operand
413using Meta =
414 std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>;
415
416class AmendNode {
417public:
418 [[nodiscard]] std::optional<std::size_t> GetAmendIndex() const {
419 if (amend_index == amend_null_index) {
420 return std::nullopt;
421 }
422 return {amend_index};
423 }
424
425 void SetAmendIndex(std::size_t index) {
426 amend_index = index;
427 }
428
429 void ClearAmend() {
430 amend_index = amend_null_index;
431 }
432
433private:
434 static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL;
435 std::size_t amend_index{amend_null_index};
436};
437
438/// Holds any kind of operation that can be done in the IR
439class OperationNode final : public AmendNode {
440public:
441 explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {}
442
443 explicit OperationNode(OperationCode code_, Meta meta_)
444 : OperationNode(code_, std::move(meta_), std::vector<Node>{}) {}
445
446 explicit OperationNode(OperationCode code_, std::vector<Node> operands_)
447 : OperationNode(code_, Meta{}, std::move(operands_)) {}
448
449 explicit OperationNode(OperationCode code_, Meta meta_, std::vector<Node> operands_)
450 : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {}
451
452 template <typename... Args>
453 explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_)
454 : code{code_}, meta{std::move(meta_)}, operands{operands_...} {}
455
456 [[nodiscard]] OperationCode GetCode() const {
457 return code;
458 }
459
460 [[nodiscard]] const Meta& GetMeta() const {
461 return meta;
462 }
463
464 [[nodiscard]] std::size_t GetOperandsCount() const {
465 return operands.size();
466 }
467
468 [[nodiscard]] const Node& operator[](std::size_t operand_index) const {
469 return operands.at(operand_index);
470 }
471
472private:
473 OperationCode code{};
474 Meta meta{};
475 std::vector<Node> operands;
476};
477
478/// Encloses inside any kind of node that returns a boolean conditionally-executed code
479class ConditionalNode final : public AmendNode {
480public:
481 explicit ConditionalNode(Node condition_, std::vector<Node>&& code_)
482 : condition{std::move(condition_)}, code{std::move(code_)} {}
483
484 [[nodiscard]] const Node& GetCondition() const {
485 return condition;
486 }
487
488 [[nodiscard]] const std::vector<Node>& GetCode() const {
489 return code;
490 }
491
492private:
493 Node condition; ///< Condition to be satisfied
494 std::vector<Node> code; ///< Code to execute
495};
496
497/// A general purpose register
498class GprNode final {
499public:
500 explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {}
501
502 [[nodiscard]] constexpr u32 GetIndex() const {
503 return static_cast<u32>(index);
504 }
505
506private:
507 Tegra::Shader::Register index{};
508};
509
510/// A custom variable
511class CustomVarNode final {
512public:
513 explicit constexpr CustomVarNode(u32 index_) : index{index_} {}
514
515 [[nodiscard]] constexpr u32 GetIndex() const {
516 return index;
517 }
518
519private:
520 u32 index{};
521};
522
523/// A 32-bits value that represents an immediate value
524class ImmediateNode final {
525public:
526 explicit constexpr ImmediateNode(u32 value_) : value{value_} {}
527
528 [[nodiscard]] constexpr u32 GetValue() const {
529 return value;
530 }
531
532private:
533 u32 value{};
534};
535
536/// One of Maxwell's internal flags
537class InternalFlagNode final {
538public:
539 explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {}
540
541 [[nodiscard]] constexpr InternalFlag GetFlag() const {
542 return flag;
543 }
544
545private:
546 InternalFlag flag{};
547};
548
549/// A predicate register, it can be negated without additional nodes
550class PredicateNode final {
551public:
552 explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_)
553 : index{index_}, negated{negated_} {}
554
555 [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const {
556 return index;
557 }
558
559 [[nodiscard]] constexpr bool IsNegated() const {
560 return negated;
561 }
562
563private:
564 Tegra::Shader::Pred index{};
565 bool negated{};
566};
567
568/// Attribute buffer memory (known as attributes or varyings in GLSL terms)
569class AbufNode final {
570public:
571 // Initialize for standard attributes (index is explicit).
572 explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {})
573 : buffer{std::move(buffer_)}, index{index_}, element{element_} {}
574
575 // Initialize for physical attributes (index is a variable value).
576 explicit AbufNode(Node physical_address_, Node buffer_ = {})
577 : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {}
578
579 [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const {
580 return index;
581 }
582
583 [[nodiscard]] u32 GetElement() const {
584 return element;
585 }
586
587 [[nodiscard]] const Node& GetBuffer() const {
588 return buffer;
589 }
590
591 [[nodiscard]] bool IsPhysicalBuffer() const {
592 return static_cast<bool>(physical_address);
593 }
594
595 [[nodiscard]] const Node& GetPhysicalAddress() const {
596 return physical_address;
597 }
598
599private:
600 Node physical_address;
601 Node buffer;
602 Tegra::Shader::Attribute::Index index{};
603 u32 element{};
604};
605
606/// Patch memory (used to communicate tessellation stages).
607class PatchNode final {
608public:
609 explicit constexpr PatchNode(u32 offset_) : offset{offset_} {}
610
611 [[nodiscard]] constexpr u32 GetOffset() const {
612 return offset;
613 }
614
615private:
616 u32 offset{};
617};
618
619/// Constant buffer node, usually mapped to uniform buffers in GLSL
620class CbufNode final {
621public:
622 explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {}
623
624 [[nodiscard]] u32 GetIndex() const {
625 return index;
626 }
627
628 [[nodiscard]] const Node& GetOffset() const {
629 return offset;
630 }
631
632private:
633 u32 index{};
634 Node offset;
635};
636
637/// Local memory node
638class LmemNode final {
639public:
640 explicit LmemNode(Node address_) : address{std::move(address_)} {}
641
642 [[nodiscard]] const Node& GetAddress() const {
643 return address;
644 }
645
646private:
647 Node address;
648};
649
650/// Shared memory node
651class SmemNode final {
652public:
653 explicit SmemNode(Node address_) : address{std::move(address_)} {}
654
655 [[nodiscard]] const Node& GetAddress() const {
656 return address;
657 }
658
659private:
660 Node address;
661};
662
663/// Global memory node
664class GmemNode final {
665public:
666 explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_)
667 : real_address{std::move(real_address_)}, base_address{std::move(base_address_)},
668 descriptor{descriptor_} {}
669
670 [[nodiscard]] const Node& GetRealAddress() const {
671 return real_address;
672 }
673
674 [[nodiscard]] const Node& GetBaseAddress() const {
675 return base_address;
676 }
677
678 [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const {
679 return descriptor;
680 }
681
682private:
683 Node real_address;
684 Node base_address;
685 GlobalMemoryBase descriptor;
686};
687
688/// Commentary, can be dropped
689class CommentNode final {
690public:
691 explicit CommentNode(std::string text_) : text{std::move(text_)} {}
692
693 [[nodiscard]] const std::string& GetText() const {
694 return text;
695 }
696
697private:
698 std::string text;
699};
700
701} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
deleted file mode 100644
index 6a5b6940d..000000000
--- a/src/video_core/shader/node_helper.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <vector>
7
8#include "common/common_types.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14Node Conditional(Node condition, std::vector<Node> code) {
15 return MakeNode<ConditionalNode>(std::move(condition), std::move(code));
16}
17
18Node Comment(std::string text) {
19 return MakeNode<CommentNode>(std::move(text));
20}
21
22Node Immediate(u32 value) {
23 return MakeNode<ImmediateNode>(value);
24}
25
26Node Immediate(s32 value) {
27 return Immediate(static_cast<u32>(value));
28}
29
30Node Immediate(f32 value) {
31 u32 integral;
32 std::memcpy(&integral, &value, sizeof(u32));
33 return Immediate(integral);
34}
35
36OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) {
37 if (is_signed) {
38 return operation_code;
39 }
40 switch (operation_code) {
41 case OperationCode::FCastInteger:
42 return OperationCode::FCastUInteger;
43 case OperationCode::IAdd:
44 return OperationCode::UAdd;
45 case OperationCode::IMul:
46 return OperationCode::UMul;
47 case OperationCode::IDiv:
48 return OperationCode::UDiv;
49 case OperationCode::IMin:
50 return OperationCode::UMin;
51 case OperationCode::IMax:
52 return OperationCode::UMax;
53 case OperationCode::ICastFloat:
54 return OperationCode::UCastFloat;
55 case OperationCode::ICastUnsigned:
56 return OperationCode::UCastSigned;
57 case OperationCode::ILogicalShiftLeft:
58 return OperationCode::ULogicalShiftLeft;
59 case OperationCode::ILogicalShiftRight:
60 return OperationCode::ULogicalShiftRight;
61 case OperationCode::IArithmeticShiftRight:
62 return OperationCode::UArithmeticShiftRight;
63 case OperationCode::IBitwiseAnd:
64 return OperationCode::UBitwiseAnd;
65 case OperationCode::IBitwiseOr:
66 return OperationCode::UBitwiseOr;
67 case OperationCode::IBitwiseXor:
68 return OperationCode::UBitwiseXor;
69 case OperationCode::IBitwiseNot:
70 return OperationCode::UBitwiseNot;
71 case OperationCode::IBitfieldExtract:
72 return OperationCode::UBitfieldExtract;
73 case OperationCode::IBitfieldInsert:
74 return OperationCode::UBitfieldInsert;
75 case OperationCode::IBitCount:
76 return OperationCode::UBitCount;
77 case OperationCode::LogicalILessThan:
78 return OperationCode::LogicalULessThan;
79 case OperationCode::LogicalIEqual:
80 return OperationCode::LogicalUEqual;
81 case OperationCode::LogicalILessEqual:
82 return OperationCode::LogicalULessEqual;
83 case OperationCode::LogicalIGreaterThan:
84 return OperationCode::LogicalUGreaterThan;
85 case OperationCode::LogicalINotEqual:
86 return OperationCode::LogicalUNotEqual;
87 case OperationCode::LogicalIGreaterEqual:
88 return OperationCode::LogicalUGreaterEqual;
89 case OperationCode::AtomicIExchange:
90 return OperationCode::AtomicUExchange;
91 case OperationCode::AtomicIAdd:
92 return OperationCode::AtomicUAdd;
93 case OperationCode::AtomicIMin:
94 return OperationCode::AtomicUMin;
95 case OperationCode::AtomicIMax:
96 return OperationCode::AtomicUMax;
97 case OperationCode::AtomicIAnd:
98 return OperationCode::AtomicUAnd;
99 case OperationCode::AtomicIOr:
100 return OperationCode::AtomicUOr;
101 case OperationCode::AtomicIXor:
102 return OperationCode::AtomicUXor;
103 case OperationCode::INegate:
104 UNREACHABLE_MSG("Can't negate an unsigned integer");
105 return {};
106 case OperationCode::IAbsolute:
107 UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
108 return {};
109 default:
110 UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code);
111 return {};
112 }
113}
114
115} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
deleted file mode 100644
index 1e0886185..000000000
--- a/src/video_core/shader/node_helper.h
+++ /dev/null
@@ -1,71 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <string>
9#include <tuple>
10#include <type_traits>
11#include <utility>
12#include <vector>
13
14#include "common/common_types.h"
15#include "video_core/shader/node.h"
16
17namespace VideoCommon::Shader {
18
19/// This arithmetic operation cannot be constraint
20inline constexpr MetaArithmetic PRECISE = {true};
21/// This arithmetic operation can be optimized away
22inline constexpr MetaArithmetic NO_PRECISE = {false};
23
24/// Creates a conditional node
25Node Conditional(Node condition, std::vector<Node> code);
26
27/// Creates a commentary node
28Node Comment(std::string text);
29
30/// Creates an u32 immediate
31Node Immediate(u32 value);
32
33/// Creates a s32 immediate
34Node Immediate(s32 value);
35
36/// Creates a f32 immediate
37Node Immediate(f32 value);
38
39/// Converts an signed operation code to an unsigned operation code
40OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed);
41
42template <typename T, typename... Args>
43Node MakeNode(Args&&... args) {
44 static_assert(std::is_convertible_v<T, NodeData>);
45 return std::make_shared<NodeData>(T(std::forward<Args>(args)...));
46}
47
48template <typename T, typename... Args>
49TrackSampler MakeTrackSampler(Args&&... args) {
50 static_assert(std::is_convertible_v<T, TrackSamplerData>);
51 return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
52}
53
54template <typename... Args>
55Node Operation(OperationCode code, Args&&... args) {
56 if constexpr (sizeof...(args) == 0) {
57 return MakeNode<OperationNode>(code);
58 } else if constexpr (std::is_convertible_v<std::tuple_element_t<0, std::tuple<Args...>>,
59 Meta>) {
60 return MakeNode<OperationNode>(code, std::forward<Args>(args)...);
61 } else {
62 return MakeNode<OperationNode>(code, Meta{}, std::forward<Args>(args)...);
63 }
64}
65
66template <typename... Args>
67Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) {
68 return Operation(SignedToUnsignedCode(code, is_signed), std::forward<Args>(args)...);
69}
70
71} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
deleted file mode 100644
index 148d91fcb..000000000
--- a/src/video_core/shader/registry.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <tuple>
7
8#include "common/assert.h"
9#include "common/common_types.h"
10#include "video_core/engines/kepler_compute.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/engines/shader_type.h"
13#include "video_core/shader/registry.h"
14
15namespace VideoCommon::Shader {
16
17using Tegra::Engines::ConstBufferEngineInterface;
18using Tegra::Engines::SamplerDescriptor;
19using Tegra::Engines::ShaderType;
20
21namespace {
22
23GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
24 if (shader_stage == ShaderType::Compute) {
25 return {};
26 }
27
28 auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine);
29
30 return {
31 .tfb_layouts = graphics.regs.tfb_layouts,
32 .tfb_varying_locs = graphics.regs.tfb_varying_locs,
33 .primitive_topology = graphics.regs.draw.topology,
34 .tessellation_primitive = graphics.regs.tess_mode.prim,
35 .tessellation_spacing = graphics.regs.tess_mode.spacing,
36 .tfb_enabled = graphics.regs.tfb_enabled != 0,
37 .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0,
38 };
39}
40
41ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
42 if (shader_stage != ShaderType::Compute) {
43 return {};
44 }
45
46 auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine);
47 const auto& launch = compute.launch_description;
48
49 return {
50 .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z},
51 .shared_memory_size_in_words = launch.shared_alloc,
52 .local_memory_size_in_words = launch.local_pos_alloc,
53 };
54}
55
56} // Anonymous namespace
57
58Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info)
59 : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
60 bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
61
62Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_)
63 : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()},
64 graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo(
65 shader_stage, engine_)} {}
66
67Registry::~Registry() = default;
68
69std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) {
70 const std::pair<u32, u32> key = {buffer, offset};
71 const auto iter = keys.find(key);
72 if (iter != keys.end()) {
73 return iter->second;
74 }
75 if (!engine) {
76 return std::nullopt;
77 }
78 const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
79 keys.emplace(key, value);
80 return value;
81}
82
83std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
84 const u32 key = offset;
85 const auto iter = bound_samplers.find(key);
86 if (iter != bound_samplers.end()) {
87 return iter->second;
88 }
89 if (!engine) {
90 return std::nullopt;
91 }
92 const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
93 bound_samplers.emplace(key, value);
94 return value;
95}
96
97std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
98 std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
99 SeparateSamplerKey key;
100 key.buffers = buffers;
101 key.offsets = offsets;
102 const auto iter = separate_samplers.find(key);
103 if (iter != separate_samplers.end()) {
104 return iter->second;
105 }
106 if (!engine) {
107 return std::nullopt;
108 }
109
110 const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
111 const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
112 const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
113 separate_samplers.emplace(key, value);
114 return value;
115}
116
117std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) {
118 const std::pair key = {buffer, offset};
119 const auto iter = bindless_samplers.find(key);
120 if (iter != bindless_samplers.end()) {
121 return iter->second;
122 }
123 if (!engine) {
124 return std::nullopt;
125 }
126 const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
127 bindless_samplers.emplace(key, value);
128 return value;
129}
130
131void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
132 keys.insert_or_assign({buffer, offset}, value);
133}
134
135void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
136 bound_samplers.insert_or_assign(offset, sampler);
137}
138
139void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
140 bindless_samplers.insert_or_assign({buffer, offset}, sampler);
141}
142
143bool Registry::IsConsistent() const {
144 if (!engine) {
145 return true;
146 }
147 return std::all_of(keys.begin(), keys.end(),
148 [this](const auto& pair) {
149 const auto [cbuf, offset] = pair.first;
150 const auto value = pair.second;
151 return value == engine->AccessConstBuffer32(stage, cbuf, offset);
152 }) &&
153 std::all_of(bound_samplers.begin(), bound_samplers.end(),
154 [this](const auto& sampler) {
155 const auto [key, value] = sampler;
156 return value == engine->AccessBoundSampler(stage, key);
157 }) &&
158 std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
159 [this](const auto& sampler) {
160 const auto [cbuf, offset] = sampler.first;
161 const auto value = sampler.second;
162 return value == engine->AccessBindlessSampler(stage, cbuf, offset);
163 });
164}
165
166bool Registry::HasEqualKeys(const Registry& rhs) const {
167 return std::tie(keys, bound_samplers, bindless_samplers) ==
168 std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
169}
170
171const GraphicsInfo& Registry::GetGraphicsInfo() const {
172 ASSERT(stage != Tegra::Engines::ShaderType::Compute);
173 return graphics_info;
174}
175
176const ComputeInfo& Registry::GetComputeInfo() const {
177 ASSERT(stage == Tegra::Engines::ShaderType::Compute);
178 return compute_info;
179}
180
181} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
deleted file mode 100644
index 4bebefdde..000000000
--- a/src/video_core/shader/registry.h
+++ /dev/null
@@ -1,172 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9#include <type_traits>
10#include <unordered_map>
11#include <utility>
12
13#include "common/common_types.h"
14#include "common/hash.h"
15#include "video_core/engines/const_buffer_engine_interface.h"
16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/engines/shader_type.h"
18#include "video_core/guest_driver.h"
19
20namespace VideoCommon::Shader {
21
22struct SeparateSamplerKey {
23 std::pair<u32, u32> buffers;
24 std::pair<u32, u32> offsets;
25};
26
27} // namespace VideoCommon::Shader
28
29namespace std {
30
31template <>
32struct hash<VideoCommon::Shader::SeparateSamplerKey> {
33 std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
34 return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
35 key.offsets.second);
36 }
37};
38
39template <>
40struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
41 bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
42 const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
43 return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
44 }
45};
46
47} // namespace std
48
49namespace VideoCommon::Shader {
50
51using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
52using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
53using SeparateSamplerMap =
54 std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
55using BindlessSamplerMap =
56 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
57
58struct GraphicsInfo {
59 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
60
61 std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers>
62 tfb_layouts{};
63 std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{};
64 Maxwell::PrimitiveTopology primitive_topology{};
65 Maxwell::TessellationPrimitive tessellation_primitive{};
66 Maxwell::TessellationSpacing tessellation_spacing{};
67 bool tfb_enabled = false;
68 bool tessellation_clockwise = false;
69};
70static_assert(std::is_trivially_copyable_v<GraphicsInfo> &&
71 std::is_standard_layout_v<GraphicsInfo>);
72
73struct ComputeInfo {
74 std::array<u32, 3> workgroup_size{};
75 u32 shared_memory_size_in_words = 0;
76 u32 local_memory_size_in_words = 0;
77};
78static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>);
79
80struct SerializedRegistryInfo {
81 VideoCore::GuestDriverProfile guest_driver_profile;
82 u32 bound_buffer = 0;
83 GraphicsInfo graphics;
84 ComputeInfo compute;
85};
86
87/**
88 * The Registry is a class use to interface the 3D and compute engines with the shader compiler.
89 * With it, the shader can obtain required data from GPU state and store it for disk shader
90 * compilation.
91 */
92class Registry {
93public:
94 explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
95
96 explicit Registry(Tegra::Engines::ShaderType shader_stage,
97 Tegra::Engines::ConstBufferEngineInterface& engine_);
98
99 ~Registry();
100
101 /// Retrieves a key from the registry, if it's registered, it will give the registered value, if
102 /// not it will obtain it from maxwell3d and register it.
103 std::optional<u32> ObtainKey(u32 buffer, u32 offset);
104
105 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
106
107 std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
108 std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
109
110 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
111
112 /// Inserts a key.
113 void InsertKey(u32 buffer, u32 offset, u32 value);
114
115 /// Inserts a bound sampler key.
116 void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
117
118 /// Inserts a bindless sampler key.
119 void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
120
121 /// Checks keys and samplers against engine's current const buffers.
122 /// Returns true if they are the same value, false otherwise.
123 bool IsConsistent() const;
124
125 /// Returns true if the keys are equal to the other ones in the registry.
126 bool HasEqualKeys(const Registry& rhs) const;
127
128 /// Returns graphics information from this shader
129 const GraphicsInfo& GetGraphicsInfo() const;
130
131 /// Returns compute information from this shader
132 const ComputeInfo& GetComputeInfo() const;
133
134 /// Gives an getter to the const buffer keys in the database.
135 const KeyMap& GetKeys() const {
136 return keys;
137 }
138
139 /// Gets samplers database.
140 const BoundSamplerMap& GetBoundSamplers() const {
141 return bound_samplers;
142 }
143
144 /// Gets bindless samplers database.
145 const BindlessSamplerMap& GetBindlessSamplers() const {
146 return bindless_samplers;
147 }
148
149 /// Gets bound buffer used on this shader
150 u32 GetBoundBuffer() const {
151 return bound_buffer;
152 }
153
154 /// Obtains access to the guest driver's profile.
155 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
156 return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
157 }
158
159private:
160 const Tegra::Engines::ShaderType stage;
161 VideoCore::GuestDriverProfile stored_guest_driver_profile;
162 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
163 KeyMap keys;
164 BoundSamplerMap bound_samplers;
165 SeparateSamplerMap separate_samplers;
166 BindlessSamplerMap bindless_samplers;
167 u32 bound_buffer;
168 GraphicsInfo graphics_info;
169 ComputeInfo compute_info;
170};
171
172} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
deleted file mode 100644
index a4987ffc6..000000000
--- a/src/video_core/shader/shader_ir.cpp
+++ /dev/null
@@ -1,464 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cmath>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "common/logging/log.h"
12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/shader/node.h"
14#include "video_core/shader/node_helper.h"
15#include "video_core/shader/registry.h"
16#include "video_core/shader/shader_ir.h"
17
18namespace VideoCommon::Shader {
19
20using Tegra::Shader::Attribute;
21using Tegra::Shader::Instruction;
22using Tegra::Shader::IpaMode;
23using Tegra::Shader::Pred;
24using Tegra::Shader::PredCondition;
25using Tegra::Shader::PredOperation;
26using Tegra::Shader::Register;
27
28ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_,
29 Registry& registry_)
30 : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{
31 registry_} {
32 Decode();
33 PostDecode();
34}
35
36ShaderIR::~ShaderIR() = default;
37
38Node ShaderIR::GetRegister(Register reg) {
39 if (reg != Register::ZeroIndex) {
40 used_registers.insert(static_cast<u32>(reg));
41 }
42 return MakeNode<GprNode>(reg);
43}
44
45Node ShaderIR::GetCustomVariable(u32 id) {
46 return MakeNode<CustomVarNode>(id);
47}
48
49Node ShaderIR::GetImmediate19(Instruction instr) {
50 return Immediate(instr.alu.GetImm20_19());
51}
52
53Node ShaderIR::GetImmediate32(Instruction instr) {
54 return Immediate(instr.alu.GetImm20_32());
55}
56
57Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) {
58 const auto index = static_cast<u32>(index_);
59 const auto offset = static_cast<u32>(offset_);
60
61 used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset);
62
63 return MakeNode<CbufNode>(index, Immediate(offset));
64}
65
66Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
67 const auto index = static_cast<u32>(index_);
68 const auto offset = static_cast<u32>(offset_);
69
70 used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect();
71
72 Node final_offset = [&] {
73 // Attempt to inline constant buffer without a variable offset. This is done to allow
74 // tracking LDC calls.
75 if (const auto gpr = std::get_if<GprNode>(&*node)) {
76 if (gpr->GetIndex() == Register::ZeroIndex) {
77 return Immediate(offset);
78 }
79 }
80 return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset));
81 }();
82 return MakeNode<CbufNode>(index, std::move(final_offset));
83}
84
85Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
86 const auto pred = static_cast<Pred>(pred_);
87 if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) {
88 used_predicates.insert(pred);
89 }
90
91 return MakeNode<PredicateNode>(pred, negated);
92}
93
94Node ShaderIR::GetPredicate(bool immediate) {
95 return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute));
96}
97
98Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
99 MarkAttributeUsage(index, element);
100 used_input_attributes.emplace(index);
101 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
102}
103
104Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
105 uses_physical_attributes = true;
106 return MakeNode<AbufNode>(GetRegister(physical_address), buffer);
107}
108
109Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
110 MarkAttributeUsage(index, element);
111 used_output_attributes.insert(index);
112 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
113}
114
115Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
116 Node node = MakeNode<InternalFlagNode>(flag);
117 if (negated) {
118 return Operation(OperationCode::LogicalNegate, std::move(node));
119 }
120 return node;
121}
122
123Node ShaderIR::GetLocalMemory(Node address) {
124 return MakeNode<LmemNode>(std::move(address));
125}
126
127Node ShaderIR::GetSharedMemory(Node address) {
128 return MakeNode<SmemNode>(std::move(address));
129}
130
131Node ShaderIR::GetTemporary(u32 id) {
132 return GetRegister(Register::ZeroIndex + 1 + id);
133}
134
135Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
136 if (absolute) {
137 value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value));
138 }
139 if (negate) {
140 value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value));
141 }
142 return value;
143}
144
145Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
146 if (!saturate) {
147 return value;
148 }
149
150 Node positive_zero = Immediate(std::copysignf(0, 1));
151 Node positive_one = Immediate(1.0f);
152 return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
153 std::move(positive_one));
154}
155
156Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) {
157 switch (size) {
158 case Register::Size::Byte:
159 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
160 std::move(value), Immediate(24));
161 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
162 std::move(value), Immediate(24));
163 return value;
164 case Register::Size::Short:
165 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
166 std::move(value), Immediate(16));
167 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
168 std::move(value), Immediate(16));
169 return value;
170 case Register::Size::Word:
171 // Default - do nothing
172 return value;
173 default:
174 UNREACHABLE_MSG("Unimplemented conversion size: {}", size);
175 return value;
176 }
177}
178
179Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) {
180 if (!is_signed) {
181 // Absolute or negate on an unsigned is pointless
182 return value;
183 }
184 if (absolute) {
185 value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value));
186 }
187 if (negate) {
188 value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value));
189 }
190 return value;
191}
192
193Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
194 Node value = Immediate(instr.half_imm.PackImmediates());
195 if (!has_negation) {
196 return value;
197 }
198
199 Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
200 Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
201
202 return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate),
203 std::move(second_negate));
204}
205
206Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
207 return Operation(OperationCode::HUnpack, type, std::move(value));
208}
209
210Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
211 switch (merge) {
212 case Tegra::Shader::HalfMerge::H0_H1:
213 return src;
214 case Tegra::Shader::HalfMerge::F32:
215 return Operation(OperationCode::HMergeF32, std::move(src));
216 case Tegra::Shader::HalfMerge::Mrg_H0:
217 return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src));
218 case Tegra::Shader::HalfMerge::Mrg_H1:
219 return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src));
220 }
221 UNREACHABLE();
222 return src;
223}
224
225Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
226 if (absolute) {
227 value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value));
228 }
229 if (negate) {
230 value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true),
231 GetPredicate(true));
232 }
233 return value;
234}
235
236Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
237 if (!saturate) {
238 return value;
239 }
240
241 Node positive_zero = Immediate(std::copysignf(0, 1));
242 Node positive_one = Immediate(1.0f);
243 return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
244 std::move(positive_one));
245}
246
247Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
248 if (condition == PredCondition::T) {
249 return GetPredicate(true);
250 } else if (condition == PredCondition::F) {
251 return GetPredicate(false);
252 }
253
254 static constexpr std::array comparison_table{
255 OperationCode(0),
256 OperationCode::LogicalFOrdLessThan, // LT
257 OperationCode::LogicalFOrdEqual, // EQ
258 OperationCode::LogicalFOrdLessEqual, // LE
259 OperationCode::LogicalFOrdGreaterThan, // GT
260 OperationCode::LogicalFOrdNotEqual, // NE
261 OperationCode::LogicalFOrdGreaterEqual, // GE
262 OperationCode::LogicalFOrdered, // NUM
263 OperationCode::LogicalFUnordered, // NAN
264 OperationCode::LogicalFUnordLessThan, // LTU
265 OperationCode::LogicalFUnordEqual, // EQU
266 OperationCode::LogicalFUnordLessEqual, // LEU
267 OperationCode::LogicalFUnordGreaterThan, // GTU
268 OperationCode::LogicalFUnordNotEqual, // NEU
269 OperationCode::LogicalFUnordGreaterEqual, // GEU
270 };
271 const std::size_t index = static_cast<std::size_t>(condition);
272 ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index);
273
274 return Operation(comparison_table[index], op_a, op_b);
275}
276
277Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
278 Node op_b) {
279 static constexpr std::array comparison_table{
280 std::pair{PredCondition::LT, OperationCode::LogicalILessThan},
281 std::pair{PredCondition::EQ, OperationCode::LogicalIEqual},
282 std::pair{PredCondition::LE, OperationCode::LogicalILessEqual},
283 std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan},
284 std::pair{PredCondition::NE, OperationCode::LogicalINotEqual},
285 std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual},
286 };
287
288 const auto comparison =
289 std::find_if(comparison_table.cbegin(), comparison_table.cend(),
290 [condition](const auto entry) { return condition == entry.first; });
291 UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
292 "Unknown predicate comparison operation");
293
294 return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
295 std::move(op_b));
296}
297
298Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
299 Node op_b) {
300 static constexpr std::array comparison_table{
301 std::pair{PredCondition::LT, OperationCode::Logical2HLessThan},
302 std::pair{PredCondition::EQ, OperationCode::Logical2HEqual},
303 std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual},
304 std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan},
305 std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual},
306 std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual},
307 std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan},
308 std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan},
309 std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan},
310 std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan},
311 std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan},
312 };
313
314 const auto comparison =
315 std::find_if(comparison_table.cbegin(), comparison_table.cend(),
316 [condition](const auto entry) { return condition == entry.first; });
317 UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
318 "Unknown predicate comparison operation");
319
320 return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
321}
322
323OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
324 static constexpr std::array operation_table{
325 OperationCode::LogicalAnd,
326 OperationCode::LogicalOr,
327 OperationCode::LogicalXor,
328 };
329
330 const auto index = static_cast<std::size_t>(operation);
331 if (index >= operation_table.size()) {
332 UNIMPLEMENTED_MSG("Unknown predicate operation.");
333 return {};
334 }
335
336 return operation_table[index];
337}
338
339Node ShaderIR::GetConditionCode(ConditionCode cc) const {
340 switch (cc) {
341 case ConditionCode::NEU:
342 return GetInternalFlag(InternalFlag::Zero, true);
343 case ConditionCode::FCSM_TR:
344 UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented");
345 return MakeNode<PredicateNode>(Pred::NeverExecute, false);
346 default:
347 UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc);
348 return MakeNode<PredicateNode>(Pred::NeverExecute, false);
349 }
350}
351
352void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
353 bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src)));
354}
355
356void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
357 bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src)));
358}
359
360void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
361 bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value)));
362}
363
364void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
365 bb.push_back(
366 Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value)));
367}
368
369void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) {
370 bb.push_back(
371 Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value)));
372}
373
374void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) {
375 SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value));
376}
377
378void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
379 if (!sets_cc) {
380 return;
381 }
382 Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f));
383 SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
384 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
385}
386
387void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) {
388 if (!sets_cc) {
389 return;
390 }
391 Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0));
392 SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
393 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
394}
395
396Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
397 return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value),
398 Immediate(offset), Immediate(bits));
399}
400
401Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
402 return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset),
403 Immediate(bits));
404}
405
406void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) {
407 switch (index) {
408 case Attribute::Index::LayerViewportPointSize:
409 switch (element) {
410 case 0:
411 UNIMPLEMENTED();
412 break;
413 case 1:
414 uses_layer = true;
415 break;
416 case 2:
417 uses_viewport_index = true;
418 break;
419 case 3:
420 uses_point_size = true;
421 break;
422 }
423 break;
424 case Attribute::Index::TessCoordInstanceIDVertexID:
425 switch (element) {
426 case 2:
427 uses_instance_id = true;
428 break;
429 case 3:
430 uses_vertex_id = true;
431 break;
432 }
433 break;
434 case Attribute::Index::ClipDistances0123:
435 case Attribute::Index::ClipDistances4567: {
436 const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element;
437 used_clip_distances.at(clip_index) = true;
438 break;
439 }
440 case Attribute::Index::FrontColor:
441 case Attribute::Index::FrontSecondaryColor:
442 case Attribute::Index::BackColor:
443 case Attribute::Index::BackSecondaryColor:
444 uses_legacy_varyings = true;
445 break;
446 default:
447 if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) {
448 uses_legacy_varyings = true;
449 }
450 break;
451 }
452}
453
454std::size_t ShaderIR::DeclareAmend(Node new_amend) {
455 const auto id = amend_code.size();
456 amend_code.push_back(std::move(new_amend));
457 return id;
458}
459
460u32 ShaderIR::NewCustomVariable() {
461 return num_custom_variables++;
462}
463
464} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
deleted file mode 100644
index 1cd7c14d7..000000000
--- a/src/video_core/shader/shader_ir.h
+++ /dev/null
@@ -1,479 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <list>
9#include <map>
10#include <optional>
11#include <set>
12#include <tuple>
13#include <vector>
14
15#include "common/common_types.h"
16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/engines/shader_bytecode.h"
18#include "video_core/engines/shader_header.h"
19#include "video_core/shader/ast.h"
20#include "video_core/shader/compiler_settings.h"
21#include "video_core/shader/memory_util.h"
22#include "video_core/shader/node.h"
23#include "video_core/shader/registry.h"
24
25namespace VideoCommon::Shader {
26
27struct ShaderBlock;
28
29constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
30
31struct ConstBuffer {
32 constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_)
33 : max_offset{max_offset_}, is_indirect{is_indirect_} {}
34
35 constexpr ConstBuffer() = default;
36
37 void MarkAsUsed(u64 offset) {
38 max_offset = std::max(max_offset, static_cast<u32>(offset));
39 }
40
41 void MarkAsUsedIndirect() {
42 is_indirect = true;
43 }
44
45 bool IsIndirect() const {
46 return is_indirect;
47 }
48
49 u32 GetSize() const {
50 return max_offset + static_cast<u32>(sizeof(float));
51 }
52
53 u32 GetMaxOffset() const {
54 return max_offset;
55 }
56
57private:
58 u32 max_offset = 0;
59 bool is_indirect = false;
60};
61
62struct GlobalMemoryUsage {
63 bool is_read{};
64 bool is_written{};
65};
66
67class ShaderIR final {
68public:
69 explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_,
70 CompilerSettings settings_, Registry& registry_);
71 ~ShaderIR();
72
73 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
74 return basic_blocks;
75 }
76
77 const std::set<u32>& GetRegisters() const {
78 return used_registers;
79 }
80
81 const std::set<Tegra::Shader::Pred>& GetPredicates() const {
82 return used_predicates;
83 }
84
85 const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const {
86 return used_input_attributes;
87 }
88
89 const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const {
90 return used_output_attributes;
91 }
92
93 const std::map<u32, ConstBuffer>& GetConstantBuffers() const {
94 return used_cbufs;
95 }
96
97 const std::list<SamplerEntry>& GetSamplers() const {
98 return used_samplers;
99 }
100
101 const std::list<ImageEntry>& GetImages() const {
102 return used_images;
103 }
104
105 const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances()
106 const {
107 return used_clip_distances;
108 }
109
110 const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const {
111 return used_global_memory;
112 }
113
114 std::size_t GetLength() const {
115 return static_cast<std::size_t>(coverage_end * sizeof(u64));
116 }
117
118 bool UsesLayer() const {
119 return uses_layer;
120 }
121
122 bool UsesViewportIndex() const {
123 return uses_viewport_index;
124 }
125
126 bool UsesPointSize() const {
127 return uses_point_size;
128 }
129
130 bool UsesInstanceId() const {
131 return uses_instance_id;
132 }
133
134 bool UsesVertexId() const {
135 return uses_vertex_id;
136 }
137
138 bool UsesLegacyVaryings() const {
139 return uses_legacy_varyings;
140 }
141
142 bool UsesYNegate() const {
143 return uses_y_negate;
144 }
145
146 bool UsesWarps() const {
147 return uses_warps;
148 }
149
150 bool HasPhysicalAttributes() const {
151 return uses_physical_attributes;
152 }
153
154 const Tegra::Shader::Header& GetHeader() const {
155 return header;
156 }
157
158 bool IsFlowStackDisabled() const {
159 return disable_flow_stack;
160 }
161
162 bool IsDecompiled() const {
163 return decompiled;
164 }
165
166 const ASTManager& GetASTManager() const {
167 return program_manager;
168 }
169
170 ASTNode GetASTProgram() const {
171 return program_manager.GetProgram();
172 }
173
174 u32 GetASTNumVariables() const {
175 return program_manager.GetVariables();
176 }
177
178 u32 ConvertAddressToNvidiaSpace(u32 address) const {
179 return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction));
180 }
181
182 /// Returns a condition code evaluated from internal flags
183 Node GetConditionCode(Tegra::Shader::ConditionCode cc) const;
184
185 const Node& GetAmendNode(std::size_t index) const {
186 return amend_code[index];
187 }
188
189 u32 GetNumCustomVariables() const {
190 return num_custom_variables;
191 }
192
193private:
194 friend class ASTDecoder;
195
196 struct SamplerInfo {
197 std::optional<Tegra::Shader::TextureType> type;
198 std::optional<bool> is_array;
199 std::optional<bool> is_shadow;
200 std::optional<bool> is_buffer;
201
202 constexpr bool IsComplete() const noexcept {
203 return type && is_array && is_shadow && is_buffer;
204 }
205 };
206
207 void Decode();
208 void PostDecode();
209
210 NodeBlock DecodeRange(u32 begin, u32 end);
211 void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
212 void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block);
213
214 /**
215 * Decodes a single instruction from Tegra to IR.
216 * @param bb Basic block where the nodes will be written to.
217 * @param pc Program counter. Offset to decode.
218 * @return Next address to decode.
219 */
220 u32 DecodeInstr(NodeBlock& bb, u32 pc);
221
222 u32 DecodeArithmetic(NodeBlock& bb, u32 pc);
223 u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc);
224 u32 DecodeBfe(NodeBlock& bb, u32 pc);
225 u32 DecodeBfi(NodeBlock& bb, u32 pc);
226 u32 DecodeShift(NodeBlock& bb, u32 pc);
227 u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc);
228 u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc);
229 u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc);
230 u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc);
231 u32 DecodeFfma(NodeBlock& bb, u32 pc);
232 u32 DecodeHfma2(NodeBlock& bb, u32 pc);
233 u32 DecodeConversion(NodeBlock& bb, u32 pc);
234 u32 DecodeWarp(NodeBlock& bb, u32 pc);
235 u32 DecodeMemory(NodeBlock& bb, u32 pc);
236 u32 DecodeTexture(NodeBlock& bb, u32 pc);
237 u32 DecodeImage(NodeBlock& bb, u32 pc);
238 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
239 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
240 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
241 u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc);
242 u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc);
243 u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc);
244 u32 DecodeFloatSet(NodeBlock& bb, u32 pc);
245 u32 DecodeIntegerSet(NodeBlock& bb, u32 pc);
246 u32 DecodeHalfSet(NodeBlock& bb, u32 pc);
247 u32 DecodeVideo(NodeBlock& bb, u32 pc);
248 u32 DecodeXmad(NodeBlock& bb, u32 pc);
249 u32 DecodeOther(NodeBlock& bb, u32 pc);
250
251 /// Generates a node for a passed register.
252 Node GetRegister(Tegra::Shader::Register reg);
253 /// Generates a node for a custom variable
254 Node GetCustomVariable(u32 id);
255 /// Generates a node representing a 19-bit immediate value
256 Node GetImmediate19(Tegra::Shader::Instruction instr);
257 /// Generates a node representing a 32-bit immediate value
258 Node GetImmediate32(Tegra::Shader::Instruction instr);
259 /// Generates a node representing a constant buffer
260 Node GetConstBuffer(u64 index, u64 offset);
261 /// Generates a node representing a constant buffer with a variadic offset
262 Node GetConstBufferIndirect(u64 index, u64 offset, Node node);
263 /// Generates a node for a passed predicate. It can be optionally negated
264 Node GetPredicate(u64 pred, bool negated = false);
265 /// Generates a predicate node for an immediate true or false value
266 Node GetPredicate(bool immediate);
267 /// Generates a node representing an input attribute. Keeps track of used attributes.
268 Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {});
269 /// Generates a node representing a physical input attribute.
270 Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {});
271 /// Generates a node representing an output attribute. Keeps track of used attributes.
272 Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
273 /// Generates a node representing an internal flag
274 Node GetInternalFlag(InternalFlag flag, bool negated = false) const;
275 /// Generates a node representing a local memory address
276 Node GetLocalMemory(Node address);
277 /// Generates a node representing a shared memory address
278 Node GetSharedMemory(Node address);
279 /// Generates a temporary, internally it uses a post-RZ register
280 Node GetTemporary(u32 id);
281
282 /// Sets a register. src value must be a number-evaluated node.
283 void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
284 /// Sets a predicate. src value must be a bool-evaluated node
285 void SetPredicate(NodeBlock& bb, u64 dest, Node src);
286 /// Sets an internal flag. src value must be a bool-evaluated node
287 void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
288 /// Sets a local memory address with a value.
289 void SetLocalMemory(NodeBlock& bb, Node address, Node value);
290 /// Sets a shared memory address with a value.
291 void SetSharedMemory(NodeBlock& bb, Node address, Node value);
292 /// Sets a temporary. Internally it uses a post-RZ register
293 void SetTemporary(NodeBlock& bb, u32 id, Node value);
294
295 /// Sets internal flags from a float
296 void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
297 /// Sets internal flags from an integer
298 void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true);
299
300 /// Conditionally absolute/negated float. Absolute is applied first
301 Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate);
302 /// Conditionally saturates a float
303 Node GetSaturatedFloat(Node value, bool saturate = true);
304
305 /// Converts an integer to different sizes.
306 Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed);
307 /// Conditionally absolute/negated integer. Absolute is applied first
308 Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed);
309
310 /// Unpacks a half immediate from an instruction
311 Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation);
312 /// Unpacks a binary value into a half float pair with a type format
313 Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type);
314 /// Merges a half pair into another value
315 Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge);
316 /// Conditionally absolute/negated half float pair. Absolute is applied first
317 Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate);
318 /// Conditionally saturates a half float pair
319 Node GetSaturatedHalfFloat(Node value, bool saturate = true);
320
321 /// Get image component value by type and size
322 std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type,
323 u32 component_size, Node original_value);
324
325 /// Returns a predicate comparing two floats
326 Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
327 /// Returns a predicate comparing two integers
328 Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed,
329 Node op_a, Node op_b);
330 /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared
331 Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
332
333 /// Returns a predicate combiner operation
334 OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
335
336 /// Queries the missing sampler info from the execution context.
337 SamplerInfo GetSamplerInfo(SamplerInfo info,
338 std::optional<Tegra::Engines::SamplerDescriptor> sampler);
339
340 /// Accesses a texture sampler.
341 std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
342
343 /// Accesses a texture sampler for a bindless texture.
344 std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info,
345 Node& index_var);
346
347 /// Accesses an image.
348 ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
349
350 /// Access a bindless image sampler.
351 ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
352
353 /// Extracts a sequence of bits from a node
354 Node BitfieldExtract(Node value, u32 offset, u32 bits);
355
356 /// Inserts a sequence of bits from a node
357 Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
358
359 /// Marks the usage of a input or output attribute.
360 void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
361
362 /// Decodes VMNMX instruction and inserts its code into the passed basic block.
363 void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr);
364
365 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
366 const Node4& components);
367
368 void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
369 const Node4& components, bool ignore_mask = false);
370 void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
371 const Node4& components, bool ignore_mask = false);
372
373 Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
374 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
375 bool is_array, bool is_aoffi,
376 std::optional<Tegra::Shader::Register> bindless_reg);
377
378 Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
379 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
380 bool is_array);
381
382 Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
383 bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp,
384 bool is_bindless);
385
386 Node4 GetTldCode(Tegra::Shader::Instruction instr);
387
388 Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
389 bool is_array);
390
391 std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement(
392 Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
393 bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
394
395 std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
396
397 std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs);
398
399 Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
400 Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
401 Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi,
402 std::optional<Tegra::Shader::Register> bindless_reg);
403
404 Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
405 u64 byte_height);
406
407 void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest,
408 Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b,
409 Tegra::Shader::PredicateResultMode predicate_mode,
410 Tegra::Shader::Pred predicate, bool sets_cc);
411 void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
412 Node op_c, Node imm_lut, bool sets_cc);
413
414 std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
415
416 std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
417 s64 cursor);
418
419 std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
420 const OperationNode& operation,
421 Node gpr, Node base_offset,
422 Node tracked, const NodeBlock& code,
423 s64 cursor);
424
425 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
426
427 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
428 s64 cursor) const;
429
430 std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
431 Tegra::Shader::Instruction instr,
432 bool is_read, bool is_write);
433
434 /// Register new amending code and obtain the reference id.
435 std::size_t DeclareAmend(Node new_amend);
436
437 u32 NewCustomVariable();
438
439 const ProgramCode& program_code;
440 const u32 main_offset;
441 const CompilerSettings settings;
442 Registry& registry;
443
444 bool decompiled{};
445 bool disable_flow_stack{};
446
447 u32 coverage_begin{};
448 u32 coverage_end{};
449
450 std::map<u32, NodeBlock> basic_blocks;
451 NodeBlock global_code;
452 ASTManager program_manager{true, true};
453 std::vector<Node> amend_code;
454 u32 num_custom_variables{};
455
456 std::set<u32> used_registers;
457 std::set<Tegra::Shader::Pred> used_predicates;
458 std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
459 std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
460 std::map<u32, ConstBuffer> used_cbufs;
461 std::list<SamplerEntry> used_samplers;
462 std::list<ImageEntry> used_images;
463 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
464 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
465 bool uses_layer{};
466 bool uses_viewport_index{};
467 bool uses_point_size{};
468 bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
469 bool uses_instance_id{};
470 bool uses_vertex_id{};
471 bool uses_legacy_varyings{};
472 bool uses_y_negate{};
473 bool uses_warps{};
474 bool uses_indexed_samplers{};
475
476 Tegra::Shader::Header header;
477};
478
479} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
deleted file mode 100644
index 6be3ea92b..000000000
--- a/src/video_core/shader/track.cpp
+++ /dev/null
@@ -1,236 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <utility>
7#include <variant>
8
9#include "common/common_types.h"
10#include "video_core/shader/node.h"
11#include "video_core/shader/node_helper.h"
12#include "video_core/shader/shader_ir.h"
13
14namespace VideoCommon::Shader {
15
16namespace {
17
18std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
19 OperationCode operation_code) {
20 for (; cursor >= 0; --cursor) {
21 Node node = code.at(cursor);
22
23 if (const auto operation = std::get_if<OperationNode>(&*node)) {
24 if (operation->GetCode() == operation_code) {
25 return {std::move(node), cursor};
26 }
27 }
28
29 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
30 const auto& conditional_code = conditional->GetCode();
31 auto result = FindOperation(
32 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
33 auto& found = result.first;
34 if (found) {
35 return {std::move(found), cursor};
36 }
37 }
38 }
39 return {};
40}
41
42std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) {
43 if (operation.GetCode() != OperationCode::UAdd) {
44 return std::nullopt;
45 }
46 Node gpr;
47 Node offset;
48 ASSERT(operation.GetOperandsCount() == 2);
49 for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) {
50 Node operand = operation[i];
51 if (std::holds_alternative<ImmediateNode>(*operand)) {
52 offset = operation[i];
53 } else if (std::holds_alternative<GprNode>(*operand)) {
54 gpr = operation[i];
55 }
56 }
57 if (offset && gpr) {
58 return std::make_pair(gpr, offset);
59 }
60 return std::nullopt;
61}
62
63bool AmendNodeCv(std::size_t amend_index, Node node) {
64 if (const auto operation = std::get_if<OperationNode>(&*node)) {
65 operation->SetAmendIndex(amend_index);
66 return true;
67 }
68 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
69 conditional->SetAmendIndex(amend_index);
70 return true;
71 }
72 return false;
73}
74
75} // Anonymous namespace
76
77std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
78 s64 cursor) {
79 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
80 const u32 cbuf_index = cbuf->GetIndex();
81
82 // Constant buffer found, test if it's an immediate
83 const auto& offset = cbuf->GetOffset();
84 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
85 auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
86 return {tracked, track};
87 }
88 if (const auto operation = std::get_if<OperationNode>(&*offset)) {
89 const u32 bound_buffer = registry.GetBoundBuffer();
90 if (bound_buffer != cbuf_index) {
91 return {};
92 }
93 if (const std::optional pair = DecoupleIndirectRead(*operation)) {
94 auto [gpr, base_offset] = *pair;
95 return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
96 code, cursor);
97 }
98 }
99 return {};
100 }
101 if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
102 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
103 return {};
104 }
105 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
106 // register that it uses as operand
107 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
108 if (!source) {
109 return {};
110 }
111 return TrackBindlessSampler(source, code, new_cursor);
112 }
113 if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
114 const OperationNode& op = *operation;
115
116 const OperationCode opcode = operation->GetCode();
117 if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
118 ASSERT(op.GetOperandsCount() == 2);
119 auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
120 auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
121 if (node_a && node_b) {
122 auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
123 std::pair{offset_a, offset_b});
124 return {tracked, std::move(track)};
125 }
126 }
127 std::size_t i = op.GetOperandsCount();
128 while (i--) {
129 if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
130 // Constant buffer found in operand.
131 return found;
132 }
133 }
134 return {};
135 }
136 if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
137 const auto& conditional_code = conditional->GetCode();
138 return TrackBindlessSampler(tracked, conditional_code,
139 static_cast<s64>(conditional_code.size()));
140 }
141 return {};
142}
143
144std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
145 const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
146 const NodeBlock& code, s64 cursor) {
147 const auto offset_imm = std::get<ImmediateNode>(*base_offset);
148 const auto& gpu_driver = registry.AccessGuestDriverProfile();
149 const u32 bindless_cv = NewCustomVariable();
150 const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
151 Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
152
153 Node cv_node = GetCustomVariable(bindless_cv);
154 Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
155 const std::size_t amend_index = DeclareAmend(std::move(amend_op));
156 AmendNodeCv(amend_index, code[cursor]);
157
158 // TODO: Implement bindless index custom variable
159 auto track =
160 MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
161 return {tracked, track};
162}
163
164std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
165 s64 cursor) const {
166 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
167 // Constant buffer found, test if it's an immediate
168 const auto& offset = cbuf->GetOffset();
169 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
170 return {tracked, cbuf->GetIndex(), immediate->GetValue()};
171 }
172 return {};
173 }
174 if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
175 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
176 return {};
177 }
178 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
179 // register that it uses as operand
180 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
181 if (!source) {
182 return {};
183 }
184 return TrackCbuf(source, code, new_cursor);
185 }
186 if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
187 for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
188 if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) {
189 // Cbuf found in operand.
190 return found;
191 }
192 }
193 return {};
194 }
195 if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
196 const auto& conditional_code = conditional->GetCode();
197 return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
198 }
199 return {};
200}
201
202std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
203 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
204 // that it uses as operand
205 const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
206 const auto& found = result.first;
207 if (!found) {
208 return std::nullopt;
209 }
210 if (const auto immediate = std::get_if<ImmediateNode>(&*found)) {
211 return immediate->GetValue();
212 }
213 return std::nullopt;
214}
215
216std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
217 s64 cursor) const {
218 for (; cursor >= 0; --cursor) {
219 const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
220 if (!found_node) {
221 return {};
222 }
223 const auto operation = std::get_if<OperationNode>(&*found_node);
224 ASSERT(operation);
225
226 const auto& target = (*operation)[0];
227 if (const auto gpr_target = std::get_if<GprNode>(&*target)) {
228 if (gpr_target->GetIndex() == tracked->GetIndex()) {
229 return {(*operation)[1], new_cursor};
230 }
231 }
232 }
233 return {};
234}
235
236} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp
deleted file mode 100644
index 22a933761..000000000
--- a/src/video_core/shader/transform_feedback.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <unordered_map>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/shader/registry.h"
13#include "video_core/shader/transform_feedback.h"
14
15namespace VideoCommon::Shader {
16
17namespace {
18
19using Maxwell = Tegra::Engines::Maxwell3D::Regs;
20
21// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
22
23/// Attribute offsets that describe a vector
24constexpr std::array VECTORS = {
25 28, // gl_Position
26 32, // Generic 0
27 36, // Generic 1
28 40, // Generic 2
29 44, // Generic 3
30 48, // Generic 4
31 52, // Generic 5
32 56, // Generic 6
33 60, // Generic 7
34 64, // Generic 8
35 68, // Generic 9
36 72, // Generic 10
37 76, // Generic 11
38 80, // Generic 12
39 84, // Generic 13
40 88, // Generic 14
41 92, // Generic 15
42 96, // Generic 16
43 100, // Generic 17
44 104, // Generic 18
45 108, // Generic 19
46 112, // Generic 20
47 116, // Generic 21
48 120, // Generic 22
49 124, // Generic 23
50 128, // Generic 24
51 132, // Generic 25
52 136, // Generic 26
53 140, // Generic 27
54 144, // Generic 28
55 148, // Generic 29
56 152, // Generic 30
57 156, // Generic 31
58 160, // gl_FrontColor
59 164, // gl_FrontSecondaryColor
60 160, // gl_BackColor
61 164, // gl_BackSecondaryColor
62 192, // gl_TexCoord[0]
63 196, // gl_TexCoord[1]
64 200, // gl_TexCoord[2]
65 204, // gl_TexCoord[3]
66 208, // gl_TexCoord[4]
67 212, // gl_TexCoord[5]
68 216, // gl_TexCoord[6]
69 220, // gl_TexCoord[7]
70};
71} // namespace
72
73std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
74
75 std::unordered_map<u8, VaryingTFB> tfb;
76
77 for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
78 const auto& locations = info.tfb_varying_locs[buffer];
79 const auto& layout = info.tfb_layouts[buffer];
80 const std::size_t varying_count = layout.varying_count;
81
82 std::size_t highest = 0;
83
84 for (std::size_t offset = 0; offset < varying_count; ++offset) {
85 const std::size_t base_offset = offset;
86 const u8 location = locations[offset];
87
88 VaryingTFB varying;
89 varying.buffer = layout.stream;
90 varying.stride = layout.stride;
91 varying.offset = offset * sizeof(u32);
92 varying.components = 1;
93
94 if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
95 UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
96
97 const u8 base_index = location / 4;
98 while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
99 ++offset;
100 ++varying.components;
101 }
102 }
103
104 [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
105 UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
106
107 highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
108 }
109
110 UNIMPLEMENTED_IF(highest != layout.stride);
111 }
112 return tfb;
113}
114
115} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h
deleted file mode 100644
index 77d05f64c..000000000
--- a/src/video_core/shader/transform_feedback.h
+++ /dev/null
@@ -1,23 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8
9#include "common/common_types.h"
10#include "video_core/shader/registry.h"
11
12namespace VideoCommon::Shader {
13
14struct VaryingTFB {
15 std::size_t buffer;
16 std::size_t stride;
17 std::size_t offset;
18 std::size_t components;
19};
20
21std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
22
23} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
new file mode 100644
index 000000000..78bf90c48
--- /dev/null
+++ b/src/video_core/shader_cache.cpp
@@ -0,0 +1,250 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <vector>
8
9#include "common/assert.h"
10#include "shader_recompiler/frontend/maxwell/control_flow.h"
11#include "shader_recompiler/object_pool.h"
12#include "video_core/dirty_flags.h"
13#include "video_core/engines/kepler_compute.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/memory_manager.h"
16#include "video_core/shader_cache.h"
17#include "video_core/shader_environment.h"
18
19namespace VideoCommon {
20
21void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
22 std::scoped_lock lock{invalidation_mutex};
23 InvalidatePagesInRegion(addr, size);
24 RemovePendingShaders();
25}
26
27void ShaderCache::OnCPUWrite(VAddr addr, size_t size) {
28 std::lock_guard lock{invalidation_mutex};
29 InvalidatePagesInRegion(addr, size);
30}
31
32void ShaderCache::SyncGuestHost() {
33 std::scoped_lock lock{invalidation_mutex};
34 RemovePendingShaders();
35}
36
37ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
38 Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
39 Tegra::Engines::KeplerCompute& kepler_compute_)
40 : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
41 rasterizer{rasterizer_} {}
42
43bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
44 auto& dirty{maxwell3d.dirty.flags};
45 if (!dirty[VideoCommon::Dirty::Shaders]) {
46 return last_shaders_valid;
47 }
48 dirty[VideoCommon::Dirty::Shaders] = false;
49
50 const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
51 for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) {
52 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
53 unique_hashes[index] = 0;
54 continue;
55 }
56 const auto& shader_config{maxwell3d.regs.shader_config[index]};
57 const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
58 const GPUVAddr shader_addr{base_addr + shader_config.offset};
59 const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
60 if (!cpu_shader_addr) {
61 LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
62 last_shaders_valid = false;
63 return false;
64 }
65 const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
66 if (!shader_info) {
67 const u32 start_address{shader_config.offset};
68 GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
69 shader_info = MakeShaderInfo(env, *cpu_shader_addr);
70 }
71 shader_infos[index] = shader_info;
72 unique_hashes[index] = shader_info->unique_hash;
73 }
74 last_shaders_valid = true;
75 return true;
76}
77
78const ShaderInfo* ShaderCache::ComputeShader() {
79 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
80 const auto& qmd{kepler_compute.launch_description};
81 const GPUVAddr shader_addr{program_base + qmd.program_start};
82 const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
83 if (!cpu_shader_addr) {
84 LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
85 return nullptr;
86 }
87 if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) {
88 return shader;
89 }
90 ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
91 return MakeShaderInfo(env, *cpu_shader_addr);
92}
93
94void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result,
95 const std::array<u64, NUM_PROGRAMS>& unique_hashes) {
96 size_t env_index{};
97 const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
98 for (size_t index = 0; index < NUM_PROGRAMS; ++index) {
99 if (unique_hashes[index] == 0) {
100 continue;
101 }
102 const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
103 auto& env{result.envs[index]};
104 const u32 start_address{maxwell3d.regs.shader_config[index].offset};
105 env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
106 env.SetCachedSize(shader_infos[index]->size_bytes);
107 result.env_ptrs[env_index++] = &env;
108 }
109}
110
111ShaderInfo* ShaderCache::TryGet(VAddr addr) const {
112 std::scoped_lock lock{lookup_mutex};
113
114 const auto it = lookup_cache.find(addr);
115 if (it == lookup_cache.end()) {
116 return nullptr;
117 }
118 return it->second->data;
119}
120
121void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size) {
122 std::scoped_lock lock{invalidation_mutex, lookup_mutex};
123
124 const VAddr addr_end = addr + size;
125 Entry* const entry = NewEntry(addr, addr_end, data.get());
126
127 const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
128 for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
129 invalidation_cache[page].push_back(entry);
130 }
131
132 storage.push_back(std::move(data));
133
134 rasterizer.UpdatePagesCachedCount(addr, size, 1);
135}
136
137void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) {
138 const VAddr addr_end = addr + size;
139 const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
140 for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
141 auto it = invalidation_cache.find(page);
142 if (it == invalidation_cache.end()) {
143 continue;
144 }
145 InvalidatePageEntries(it->second, addr, addr_end);
146 }
147}
148
149void ShaderCache::RemovePendingShaders() {
150 if (marked_for_removal.empty()) {
151 return;
152 }
153 // Remove duplicates
154 std::ranges::sort(marked_for_removal);
155 marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
156 marked_for_removal.end());
157
158 std::vector<ShaderInfo*> removed_shaders;
159 removed_shaders.reserve(marked_for_removal.size());
160
161 std::scoped_lock lock{lookup_mutex};
162
163 for (Entry* const entry : marked_for_removal) {
164 removed_shaders.push_back(entry->data);
165
166 const auto it = lookup_cache.find(entry->addr_start);
167 ASSERT(it != lookup_cache.end());
168 lookup_cache.erase(it);
169 }
170 marked_for_removal.clear();
171
172 if (!removed_shaders.empty()) {
173 RemoveShadersFromStorage(std::move(removed_shaders));
174 }
175}
176
177void ShaderCache::InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
178 size_t index = 0;
179 while (index < entries.size()) {
180 Entry* const entry = entries[index];
181 if (!entry->Overlaps(addr, addr_end)) {
182 ++index;
183 continue;
184 }
185
186 UnmarkMemory(entry);
187 RemoveEntryFromInvalidationCache(entry);
188 marked_for_removal.push_back(entry);
189 }
190}
191
192void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) {
193 const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
194 for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) {
195 const auto entries_it = invalidation_cache.find(page);
196 ASSERT(entries_it != invalidation_cache.end());
197 std::vector<Entry*>& entries = entries_it->second;
198
199 const auto entry_it = std::ranges::find(entries, entry);
200 ASSERT(entry_it != entries.end());
201 entries.erase(entry_it);
202 }
203}
204
205void ShaderCache::UnmarkMemory(Entry* entry) {
206 if (!entry->is_memory_marked) {
207 return;
208 }
209 entry->is_memory_marked = false;
210
211 const VAddr addr = entry->addr_start;
212 const size_t size = entry->addr_end - addr;
213 rasterizer.UpdatePagesCachedCount(addr, size, -1);
214}
215
216void ShaderCache::RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders) {
217 // Remove them from the cache
218 std::erase_if(storage, [&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) {
219 return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end();
220 });
221}
222
223ShaderCache::Entry* ShaderCache::NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) {
224 auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
225 Entry* const entry_pointer = entry.get();
226
227 lookup_cache.emplace(addr, std::move(entry));
228 return entry_pointer;
229}
230
231const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) {
232 auto info = std::make_unique<ShaderInfo>();
233 if (const std::optional<u64> cached_hash{env.Analyze()}) {
234 info->unique_hash = *cached_hash;
235 info->size_bytes = env.CachedSize();
236 } else {
237 // Slow path, not really hit on commercial games
238 // Build a control flow graph to get the real shader size
239 Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
240 Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()};
241 info->unique_hash = env.CalculateHash();
242 info->size_bytes = env.ReadSize();
243 }
244 const size_t size_bytes{info->size_bytes};
245 const ShaderInfo* const result{info.get()};
246 Register(std::move(info), cpu_addr, size_bytes);
247 return result;
248}
249
250} // namespace VideoCommon
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index 015a789d6..136fe294c 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -5,226 +5,147 @@
5#pragma once 5#pragma once
6 6
7#include <algorithm> 7#include <algorithm>
8#include <array>
8#include <memory> 9#include <memory>
9#include <mutex> 10#include <mutex>
11#include <span>
10#include <unordered_map> 12#include <unordered_map>
11#include <utility> 13#include <utility>
12#include <vector> 14#include <vector>
13 15
14#include "common/assert.h"
15#include "common/common_types.h" 16#include "common/common_types.h"
16#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
18#include "video_core/shader_environment.h"
19
20namespace Tegra {
21class MemoryManager;
22}
17 23
18namespace VideoCommon { 24namespace VideoCommon {
19 25
20template <class T> 26class GenericEnvironment;
27
28struct ShaderInfo {
29 u64 unique_hash{};
30 size_t size_bytes{};
31};
32
21class ShaderCache { 33class ShaderCache {
22 static constexpr u64 PAGE_BITS = 14; 34 static constexpr u64 PAGE_BITS = 14;
23 static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; 35 static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS;
24 36
37 static constexpr size_t NUM_PROGRAMS = 6;
38
25 struct Entry { 39 struct Entry {
26 VAddr addr_start; 40 VAddr addr_start;
27 VAddr addr_end; 41 VAddr addr_end;
28 T* data; 42 ShaderInfo* data;
29 43
30 bool is_memory_marked = true; 44 bool is_memory_marked = true;
31 45
32 constexpr bool Overlaps(VAddr start, VAddr end) const noexcept { 46 bool Overlaps(VAddr start, VAddr end) const noexcept {
33 return start < addr_end && addr_start < end; 47 return start < addr_end && addr_start < end;
34 } 48 }
35 }; 49 };
36 50
37public: 51public:
38 virtual ~ShaderCache() = default;
39
40 /// @brief Removes shaders inside a given region 52 /// @brief Removes shaders inside a given region
41 /// @note Checks for ranges 53 /// @note Checks for ranges
42 /// @param addr Start address of the invalidation 54 /// @param addr Start address of the invalidation
43 /// @param size Number of bytes of the invalidation 55 /// @param size Number of bytes of the invalidation
44 void InvalidateRegion(VAddr addr, std::size_t size) { 56 void InvalidateRegion(VAddr addr, size_t size);
45 std::scoped_lock lock{invalidation_mutex};
46 InvalidatePagesInRegion(addr, size);
47 RemovePendingShaders();
48 }
49 57
50 /// @brief Unmarks a memory region as cached and marks it for removal 58 /// @brief Unmarks a memory region as cached and marks it for removal
51 /// @param addr Start address of the CPU write operation 59 /// @param addr Start address of the CPU write operation
52 /// @param size Number of bytes of the CPU write operation 60 /// @param size Number of bytes of the CPU write operation
53 void OnCPUWrite(VAddr addr, std::size_t size) { 61 void OnCPUWrite(VAddr addr, size_t size);
54 std::lock_guard lock{invalidation_mutex};
55 InvalidatePagesInRegion(addr, size);
56 }
57 62
58 /// @brief Flushes delayed removal operations 63 /// @brief Flushes delayed removal operations
59 void SyncGuestHost() { 64 void SyncGuestHost();
60 std::scoped_lock lock{invalidation_mutex};
61 RemovePendingShaders();
62 }
63 65
64 /// @brief Tries to obtain a cached shader starting in a given address 66protected:
65 /// @note Doesn't check for ranges, the given address has to be the start of the shader 67 struct GraphicsEnvironments {
66 /// @param addr Start address of the shader, this doesn't cache for region 68 std::array<GraphicsEnvironment, NUM_PROGRAMS> envs;
67 /// @return Pointer to a valid shader, nullptr when nothing is found 69 std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs;
68 T* TryGet(VAddr addr) const {
69 std::scoped_lock lock{lookup_mutex};
70 70
71 const auto it = lookup_cache.find(addr); 71 std::span<Shader::Environment* const> Span() const noexcept {
72 if (it == lookup_cache.end()) { 72 return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr));
73 return nullptr;
74 } 73 }
75 return it->second->data; 74 };
76 }
77
78protected:
79 explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
80 75
81 /// @brief Register in the cache a given entry 76 explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
82 /// @param data Shader to store in the cache 77 Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
83 /// @param addr Start address of the shader that will be registered 78 Tegra::Engines::KeplerCompute& kepler_compute_);
84 /// @param size Size in bytes of the shader
85 void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) {
86 std::scoped_lock lock{invalidation_mutex, lookup_mutex};
87 79
88 const VAddr addr_end = addr + size; 80 /// @brief Update the hashes and information of shader stages
89 Entry* const entry = NewEntry(addr, addr_end, data.get()); 81 /// @param unique_hashes Shader hashes to store into when a stage is enabled
82 /// @return True no success, false on error
83 bool RefreshStages(std::array<u64, NUM_PROGRAMS>& unique_hashes);
90 84
91 const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; 85 /// @brief Returns information about the current compute shader
92 for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { 86 /// @return Pointer to a valid shader, nullptr on error
93 invalidation_cache[page].push_back(entry); 87 const ShaderInfo* ComputeShader();
94 }
95 88
96 storage.push_back(std::move(data)); 89 /// @brief Collect the current graphics environments
90 void GetGraphicsEnvironments(GraphicsEnvironments& result,
91 const std::array<u64, NUM_PROGRAMS>& unique_hashes);
97 92
98 rasterizer.UpdatePagesCachedCount(addr, size, 1); 93 Tegra::MemoryManager& gpu_memory;
99 } 94 Tegra::Engines::Maxwell3D& maxwell3d;
95 Tegra::Engines::KeplerCompute& kepler_compute;
100 96
101 /// @brief Called when a shader is going to be removed 97 std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{};
102 /// @param shader Shader that will be removed 98 bool last_shaders_valid = false;
103 /// @pre invalidation_cache is locked
104 /// @pre lookup_mutex is locked
105 virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
106 99
107private: 100private:
101 /// @brief Tries to obtain a cached shader starting in a given address
102 /// @note Doesn't check for ranges, the given address has to be the start of the shader
103 /// @param addr Start address of the shader, this doesn't cache for region
104 /// @return Pointer to a valid shader, nullptr when nothing is found
105 ShaderInfo* TryGet(VAddr addr) const;
106
107 /// @brief Register in the cache a given entry
108 /// @param data Shader to store in the cache
109 /// @param addr Start address of the shader that will be registered
110 /// @param size Size in bytes of the shader
111 void Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size);
112
108 /// @brief Invalidate pages in a given region 113 /// @brief Invalidate pages in a given region
109 /// @pre invalidation_mutex is locked 114 /// @pre invalidation_mutex is locked
110 void InvalidatePagesInRegion(VAddr addr, std::size_t size) { 115 void InvalidatePagesInRegion(VAddr addr, size_t size);
111 const VAddr addr_end = addr + size;
112 const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
113 for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
114 auto it = invalidation_cache.find(page);
115 if (it == invalidation_cache.end()) {
116 continue;
117 }
118 InvalidatePageEntries(it->second, addr, addr_end);
119 }
120 }
121 116
122 /// @brief Remove shaders marked for deletion 117 /// @brief Remove shaders marked for deletion
123 /// @pre invalidation_mutex is locked 118 /// @pre invalidation_mutex is locked
124 void RemovePendingShaders() { 119 void RemovePendingShaders();
125 if (marked_for_removal.empty()) {
126 return;
127 }
128 // Remove duplicates
129 std::sort(marked_for_removal.begin(), marked_for_removal.end());
130 marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
131 marked_for_removal.end());
132
133 std::vector<T*> removed_shaders;
134 removed_shaders.reserve(marked_for_removal.size());
135
136 std::scoped_lock lock{lookup_mutex};
137
138 for (Entry* const entry : marked_for_removal) {
139 removed_shaders.push_back(entry->data);
140
141 const auto it = lookup_cache.find(entry->addr_start);
142 ASSERT(it != lookup_cache.end());
143 lookup_cache.erase(it);
144 }
145 marked_for_removal.clear();
146
147 if (!removed_shaders.empty()) {
148 RemoveShadersFromStorage(std::move(removed_shaders));
149 }
150 }
151 120
152 /// @brief Invalidates entries in a given range for the passed page 121 /// @brief Invalidates entries in a given range for the passed page
153 /// @param entries Vector of entries in the page, it will be modified on overlaps 122 /// @param entries Vector of entries in the page, it will be modified on overlaps
154 /// @param addr Start address of the invalidation 123 /// @param addr Start address of the invalidation
155 /// @param addr_end Non-inclusive end address of the invalidation 124 /// @param addr_end Non-inclusive end address of the invalidation
156 /// @pre invalidation_mutex is locked 125 /// @pre invalidation_mutex is locked
157 void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) { 126 void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end);
158 std::size_t index = 0;
159 while (index < entries.size()) {
160 Entry* const entry = entries[index];
161 if (!entry->Overlaps(addr, addr_end)) {
162 ++index;
163 continue;
164 }
165
166 UnmarkMemory(entry);
167 RemoveEntryFromInvalidationCache(entry);
168 marked_for_removal.push_back(entry);
169 }
170 }
171 127
172 /// @brief Removes all references to an entry in the invalidation cache 128 /// @brief Removes all references to an entry in the invalidation cache
173 /// @param entry Entry to remove from the invalidation cache 129 /// @param entry Entry to remove from the invalidation cache
174 /// @pre invalidation_mutex is locked 130 /// @pre invalidation_mutex is locked
175 void RemoveEntryFromInvalidationCache(const Entry* entry) { 131 void RemoveEntryFromInvalidationCache(const Entry* entry);
176 const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
177 for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) {
178 const auto entries_it = invalidation_cache.find(page);
179 ASSERT(entries_it != invalidation_cache.end());
180 std::vector<Entry*>& entries = entries_it->second;
181
182 const auto entry_it = std::find(entries.begin(), entries.end(), entry);
183 ASSERT(entry_it != entries.end());
184 entries.erase(entry_it);
185 }
186 }
187 132
188 /// @brief Unmarks an entry from the rasterizer cache 133 /// @brief Unmarks an entry from the rasterizer cache
189 /// @param entry Entry to unmark from memory 134 /// @param entry Entry to unmark from memory
190 void UnmarkMemory(Entry* entry) { 135 void UnmarkMemory(Entry* entry);
191 if (!entry->is_memory_marked) {
192 return;
193 }
194 entry->is_memory_marked = false;
195
196 const VAddr addr = entry->addr_start;
197 const std::size_t size = entry->addr_end - addr;
198 rasterizer.UpdatePagesCachedCount(addr, size, -1);
199 }
200 136
201 /// @brief Removes a vector of shaders from a list 137 /// @brief Removes a vector of shaders from a list
202 /// @param removed_shaders Shaders to be removed from the storage 138 /// @param removed_shaders Shaders to be removed from the storage
203 /// @pre invalidation_mutex is locked 139 /// @pre invalidation_mutex is locked
204 /// @pre lookup_mutex is locked 140 /// @pre lookup_mutex is locked
205 void RemoveShadersFromStorage(std::vector<T*> removed_shaders) { 141 void RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders);
206 // Notify removals
207 for (T* const shader : removed_shaders) {
208 OnShaderRemoval(shader);
209 }
210
211 // Remove them from the cache
212 const auto is_removed = [&removed_shaders](const std::unique_ptr<T>& shader) {
213 return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
214 removed_shaders.end();
215 };
216 std::erase_if(storage, is_removed);
217 }
218 142
219 /// @brief Creates a new entry in the lookup cache and returns its pointer 143 /// @brief Creates a new entry in the lookup cache and returns its pointer
220 /// @pre lookup_mutex is locked 144 /// @pre lookup_mutex is locked
221 Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) { 145 Entry* NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data);
222 auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
223 Entry* const entry_pointer = entry.get();
224 146
225 lookup_cache.emplace(addr, std::move(entry)); 147 /// @brief Create a new shader entry and register it
226 return entry_pointer; 148 const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr);
227 }
228 149
229 VideoCore::RasterizerInterface& rasterizer; 150 VideoCore::RasterizerInterface& rasterizer;
230 151
@@ -233,7 +154,7 @@ private:
233 154
234 std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache; 155 std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
235 std::unordered_map<u64, std::vector<Entry*>> invalidation_cache; 156 std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
236 std::vector<std::unique_ptr<T>> storage; 157 std::vector<std::unique_ptr<ShaderInfo>> storage;
237 std::vector<Entry*> marked_for_removal; 158 std::vector<Entry*> marked_for_removal;
238}; 159};
239 160
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
new file mode 100644
index 000000000..8a4581c19
--- /dev/null
+++ b/src/video_core/shader_environment.cpp
@@ -0,0 +1,460 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <filesystem>
6#include <fstream>
7#include <memory>
8#include <optional>
9#include <utility>
10
11#include "common/assert.h"
12#include "common/cityhash.h"
13#include "common/common_types.h"
14#include "common/div_ceil.h"
15#include "common/fs/fs.h"
16#include "common/logging/log.h"
17#include "shader_recompiler/environment.h"
18#include "video_core/memory_manager.h"
19#include "video_core/shader_environment.h"
20#include "video_core/textures/texture.h"
21
22namespace VideoCommon {
23
24constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'};
25
26constexpr size_t INST_SIZE = sizeof(u64);
27
28using Maxwell = Tegra::Engines::Maxwell3D::Regs;
29
30static u64 MakeCbufKey(u32 index, u32 offset) {
31 return (static_cast<u64>(index) << 32) | offset;
32}
33
34static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) {
35 switch (entry.texture_type) {
36 case Tegra::Texture::TextureType::Texture1D:
37 return Shader::TextureType::Color1D;
38 case Tegra::Texture::TextureType::Texture2D:
39 case Tegra::Texture::TextureType::Texture2DNoMipmap:
40 return Shader::TextureType::Color2D;
41 case Tegra::Texture::TextureType::Texture3D:
42 return Shader::TextureType::Color3D;
43 case Tegra::Texture::TextureType::TextureCubemap:
44 return Shader::TextureType::ColorCube;
45 case Tegra::Texture::TextureType::Texture1DArray:
46 return Shader::TextureType::ColorArray1D;
47 case Tegra::Texture::TextureType::Texture2DArray:
48 return Shader::TextureType::ColorArray2D;
49 case Tegra::Texture::TextureType::Texture1DBuffer:
50 return Shader::TextureType::Buffer;
51 case Tegra::Texture::TextureType::TextureCubeArray:
52 return Shader::TextureType::ColorArrayCube;
53 default:
54 throw Shader::NotImplementedException("Unknown texture type");
55 }
56}
57
58GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
59 u32 start_address_)
60 : gpu_memory{&gpu_memory_}, program_base{program_base_} {
61 start_address = start_address_;
62}
63
64GenericEnvironment::~GenericEnvironment() = default;
65
66u32 GenericEnvironment::TextureBoundBuffer() const {
67 return texture_bound;
68}
69
70u32 GenericEnvironment::LocalMemorySize() const {
71 return local_memory_size;
72}
73
74u32 GenericEnvironment::SharedMemorySize() const {
75 return shared_memory_size;
76}
77
78std::array<u32, 3> GenericEnvironment::WorkgroupSize() const {
79 return workgroup_size;
80}
81
82u64 GenericEnvironment::ReadInstruction(u32 address) {
83 read_lowest = std::min(read_lowest, address);
84 read_highest = std::max(read_highest, address);
85
86 if (address >= cached_lowest && address < cached_highest) {
87 return code[(address - cached_lowest) / INST_SIZE];
88 }
89 has_unbound_instructions = true;
90 return gpu_memory->Read<u64>(program_base + address);
91}
92
93std::optional<u64> GenericEnvironment::Analyze() {
94 const std::optional<u64> size{TryFindSize()};
95 if (!size) {
96 return std::nullopt;
97 }
98 cached_lowest = start_address;
99 cached_highest = start_address + static_cast<u32>(*size);
100 return Common::CityHash64(reinterpret_cast<const char*>(code.data()), *size);
101}
102
103void GenericEnvironment::SetCachedSize(size_t size_bytes) {
104 cached_lowest = start_address;
105 cached_highest = start_address + static_cast<u32>(size_bytes);
106 code.resize(CachedSize());
107 gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64));
108}
109
110size_t GenericEnvironment::CachedSize() const noexcept {
111 return cached_highest - cached_lowest + INST_SIZE;
112}
113
114size_t GenericEnvironment::ReadSize() const noexcept {
115 return read_highest - read_lowest + INST_SIZE;
116}
117
118bool GenericEnvironment::CanBeSerialized() const noexcept {
119 return !has_unbound_instructions;
120}
121
122u64 GenericEnvironment::CalculateHash() const {
123 const size_t size{ReadSize()};
124 const auto data{std::make_unique<char[]>(size)};
125 gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
126 return Common::CityHash64(data.get(), size);
127}
128
129void GenericEnvironment::Serialize(std::ofstream& file) const {
130 const u64 code_size{static_cast<u64>(CachedSize())};
131 const u64 num_texture_types{static_cast<u64>(texture_types.size())};
132 const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
133
134 file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
135 .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
136 .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
137 .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
138 .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
139 .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
140 .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest))
141 .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest))
142 .write(reinterpret_cast<const char*>(&stage), sizeof(stage))
143 .write(reinterpret_cast<const char*>(code.data()), code_size);
144 for (const auto [key, type] : texture_types) {
145 file.write(reinterpret_cast<const char*>(&key), sizeof(key))
146 .write(reinterpret_cast<const char*>(&type), sizeof(type));
147 }
148 for (const auto [key, type] : cbuf_values) {
149 file.write(reinterpret_cast<const char*>(&key), sizeof(key))
150 .write(reinterpret_cast<const char*>(&type), sizeof(type));
151 }
152 if (stage == Shader::Stage::Compute) {
153 file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
154 .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size));
155 } else {
156 file.write(reinterpret_cast<const char*>(&sph), sizeof(sph));
157 if (stage == Shader::Stage::Geometry) {
158 file.write(reinterpret_cast<const char*>(&gp_passthrough_mask),
159 sizeof(gp_passthrough_mask));
160 }
161 }
162}
163
164std::optional<u64> GenericEnvironment::TryFindSize() {
165 static constexpr size_t BLOCK_SIZE = 0x1000;
166 static constexpr size_t MAXIMUM_SIZE = 0x100000;
167
168 static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
169 static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
170
171 GPUVAddr guest_addr{program_base + start_address};
172 size_t offset{0};
173 size_t size{BLOCK_SIZE};
174 while (size <= MAXIMUM_SIZE) {
175 code.resize(size / INST_SIZE);
176 u64* const data = code.data() + offset / INST_SIZE;
177 gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
178 for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) {
179 const u64 inst = data[index / INST_SIZE];
180 if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
181 return offset + index;
182 }
183 }
184 guest_addr += BLOCK_SIZE;
185 size += BLOCK_SIZE;
186 offset += BLOCK_SIZE;
187 }
188 return std::nullopt;
189}
190
191Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit,
192 bool via_header_index, u32 raw) {
193 const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
194 const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
195 Tegra::Texture::TICEntry entry;
196 gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
197 const Shader::TextureType result{ConvertType(entry)};
198 texture_types.emplace(raw, result);
199 return result;
200}
201
202GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
203 Tegra::MemoryManager& gpu_memory_,
204 Maxwell::ShaderProgram program, GPUVAddr program_base_,
205 u32 start_address_)
206 : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} {
207 gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph));
208 gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask;
209 switch (program) {
210 case Maxwell::ShaderProgram::VertexA:
211 stage = Shader::Stage::VertexA;
212 stage_index = 0;
213 break;
214 case Maxwell::ShaderProgram::VertexB:
215 stage = Shader::Stage::VertexB;
216 stage_index = 0;
217 break;
218 case Maxwell::ShaderProgram::TesselationControl:
219 stage = Shader::Stage::TessellationControl;
220 stage_index = 1;
221 break;
222 case Maxwell::ShaderProgram::TesselationEval:
223 stage = Shader::Stage::TessellationEval;
224 stage_index = 2;
225 break;
226 case Maxwell::ShaderProgram::Geometry:
227 stage = Shader::Stage::Geometry;
228 stage_index = 3;
229 break;
230 case Maxwell::ShaderProgram::Fragment:
231 stage = Shader::Stage::Fragment;
232 stage_index = 4;
233 break;
234 default:
235 UNREACHABLE_MSG("Invalid program={}", program);
236 break;
237 }
238 const u64 local_size{sph.LocalMemorySize()};
239 ASSERT(local_size <= std::numeric_limits<u32>::max());
240 local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size;
241 texture_bound = maxwell3d->regs.tex_cb_index;
242}
243
244u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
245 const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]};
246 ASSERT(cbuf.enabled);
247 u32 value{};
248 if (cbuf_offset < cbuf.size) {
249 value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset);
250 }
251 cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
252 return value;
253}
254
255Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
256 const auto& regs{maxwell3d->regs};
257 const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
258 return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle);
259}
260
261ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
262 Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
263 u32 start_address_)
264 : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{
265 &kepler_compute_} {
266 const auto& qmd{kepler_compute->launch_description};
267 stage = Shader::Stage::Compute;
268 local_memory_size = qmd.local_pos_alloc + qmd.local_crs_alloc;
269 texture_bound = kepler_compute->regs.tex_cb_index;
270 shared_memory_size = qmd.shared_alloc;
271 workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
272}
273
274u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
275 const auto& qmd{kepler_compute->launch_description};
276 ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0);
277 const auto& cbuf{qmd.const_buffer_config[cbuf_index]};
278 u32 value{};
279 if (cbuf_offset < cbuf.size) {
280 value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset);
281 }
282 cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
283 return value;
284}
285
286Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) {
287 const auto& regs{kepler_compute->regs};
288 const auto& qmd{kepler_compute->launch_description};
289 return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
290}
291
292void FileEnvironment::Deserialize(std::ifstream& file) {
293 u64 code_size{};
294 u64 num_texture_types{};
295 u64 num_cbuf_values{};
296 file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
297 .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
298 .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
299 .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
300 .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
301 .read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
302 .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest))
303 .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest))
304 .read(reinterpret_cast<char*>(&stage), sizeof(stage));
305 code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64)));
306 file.read(reinterpret_cast<char*>(code.get()), code_size);
307 for (size_t i = 0; i < num_texture_types; ++i) {
308 u32 key;
309 Shader::TextureType type;
310 file.read(reinterpret_cast<char*>(&key), sizeof(key))
311 .read(reinterpret_cast<char*>(&type), sizeof(type));
312 texture_types.emplace(key, type);
313 }
314 for (size_t i = 0; i < num_cbuf_values; ++i) {
315 u64 key;
316 u32 value;
317 file.read(reinterpret_cast<char*>(&key), sizeof(key))
318 .read(reinterpret_cast<char*>(&value), sizeof(value));
319 cbuf_values.emplace(key, value);
320 }
321 if (stage == Shader::Stage::Compute) {
322 file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
323 .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
324 } else {
325 file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
326 if (stage == Shader::Stage::Geometry) {
327 file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask));
328 }
329 }
330}
331
332u64 FileEnvironment::ReadInstruction(u32 address) {
333 if (address < read_lowest || address > read_highest) {
334 throw Shader::LogicError("Out of bounds address {}", address);
335 }
336 return code[(address - read_lowest) / sizeof(u64)];
337}
338
339u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
340 const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))};
341 if (it == cbuf_values.end()) {
342 throw Shader::LogicError("Uncached read texture type");
343 }
344 return it->second;
345}
346
347Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) {
348 const auto it{texture_types.find(handle)};
349 if (it == texture_types.end()) {
350 throw Shader::LogicError("Uncached read texture type");
351 }
352 return it->second;
353}
354
355u32 FileEnvironment::LocalMemorySize() const {
356 return local_memory_size;
357}
358
359u32 FileEnvironment::SharedMemorySize() const {
360 return shared_memory_size;
361}
362
363u32 FileEnvironment::TextureBoundBuffer() const {
364 return texture_bound;
365}
366
367std::array<u32, 3> FileEnvironment::WorkgroupSize() const {
368 return workgroup_size;
369}
370
371void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
372 const std::filesystem::path& filename, u32 cache_version) try {
373 std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app);
374 file.exceptions(std::ifstream::failbit);
375 if (!file.is_open()) {
376 LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}",
377 Common::FS::PathToUTF8String(filename));
378 return;
379 }
380 if (file.tellp() == 0) {
381 // Write header
382 file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size())
383 .write(reinterpret_cast<const char*>(&cache_version), sizeof(cache_version));
384 }
385 if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) {
386 return;
387 }
388 const u32 num_envs{static_cast<u32>(envs.size())};
389 file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs));
390 for (const GenericEnvironment* const env : envs) {
391 env->Serialize(file);
392 }
393 file.write(key.data(), key.size_bytes());
394
395} catch (const std::ios_base::failure& e) {
396 LOG_ERROR(Common_Filesystem, "{}", e.what());
397 if (!Common::FS::RemoveFile(filename)) {
398 LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
399 Common::FS::PathToUTF8String(filename));
400 }
401}
402
403void LoadPipelines(
404 std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version,
405 Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute,
406 Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics) try {
407 std::ifstream file(filename, std::ios::binary | std::ios::ate);
408 if (!file.is_open()) {
409 return;
410 }
411 file.exceptions(std::ifstream::failbit);
412 const auto end{file.tellg()};
413 file.seekg(0, std::ios::beg);
414
415 std::array<char, 8> magic_number;
416 u32 cache_version;
417 file.read(magic_number.data(), magic_number.size())
418 .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version));
419 if (magic_number != MAGIC_NUMBER || cache_version != expected_cache_version) {
420 file.close();
421 if (Common::FS::RemoveFile(filename)) {
422 if (magic_number != MAGIC_NUMBER) {
423 LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file");
424 }
425 if (cache_version != expected_cache_version) {
426 LOG_INFO(Common_Filesystem, "Deleting old pipeline cache");
427 }
428 } else {
429 LOG_ERROR(Common_Filesystem,
430 "Invalid pipeline cache file and failed to delete it in \"{}\"",
431 Common::FS::PathToUTF8String(filename));
432 }
433 return;
434 }
435 while (file.tellg() != end) {
436 if (stop_loading.stop_requested()) {
437 return;
438 }
439 u32 num_envs{};
440 file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs));
441 std::vector<FileEnvironment> envs(num_envs);
442 for (FileEnvironment& env : envs) {
443 env.Deserialize(file);
444 }
445 if (envs.front().ShaderStage() == Shader::Stage::Compute) {
446 load_compute(file, std::move(envs.front()));
447 } else {
448 load_graphics(file, std::move(envs));
449 }
450 }
451
452} catch (const std::ios_base::failure& e) {
453 LOG_ERROR(Common_Filesystem, "{}", e.what());
454 if (!Common::FS::RemoveFile(filename)) {
455 LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
456 Common::FS::PathToUTF8String(filename));
457 }
458}
459
460} // namespace VideoCommon
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
new file mode 100644
index 000000000..2079979db
--- /dev/null
+++ b/src/video_core/shader_environment.h
@@ -0,0 +1,183 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <atomic>
9#include <filesystem>
10#include <iosfwd>
11#include <limits>
12#include <memory>
13#include <optional>
14#include <span>
15#include <type_traits>
16#include <unordered_map>
17#include <vector>
18
19#include "common/common_types.h"
20#include "common/unique_function.h"
21#include "shader_recompiler/environment.h"
22#include "video_core/engines/kepler_compute.h"
23#include "video_core/engines/maxwell_3d.h"
24#include "video_core/textures/texture.h"
25
26namespace Tegra {
27class Memorymanager;
28}
29
30namespace VideoCommon {
31
32class GenericEnvironment : public Shader::Environment {
33public:
34 explicit GenericEnvironment() = default;
35 explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
36 u32 start_address_);
37
38 ~GenericEnvironment() override;
39
40 [[nodiscard]] u32 TextureBoundBuffer() const final;
41
42 [[nodiscard]] u32 LocalMemorySize() const final;
43
44 [[nodiscard]] u32 SharedMemorySize() const final;
45
46 [[nodiscard]] std::array<u32, 3> WorkgroupSize() const final;
47
48 [[nodiscard]] u64 ReadInstruction(u32 address) final;
49
50 [[nodiscard]] std::optional<u64> Analyze();
51
52 void SetCachedSize(size_t size_bytes);
53
54 [[nodiscard]] size_t CachedSize() const noexcept;
55
56 [[nodiscard]] size_t ReadSize() const noexcept;
57
58 [[nodiscard]] bool CanBeSerialized() const noexcept;
59
60 [[nodiscard]] u64 CalculateHash() const;
61
62 void Serialize(std::ofstream& file) const;
63
64protected:
65 std::optional<u64> TryFindSize();
66
67 Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index,
68 u32 raw);
69
70 Tegra::MemoryManager* gpu_memory{};
71 GPUVAddr program_base{};
72
73 std::vector<u64> code;
74 std::unordered_map<u32, Shader::TextureType> texture_types;
75 std::unordered_map<u64, u32> cbuf_values;
76
77 u32 local_memory_size{};
78 u32 texture_bound{};
79 u32 shared_memory_size{};
80 std::array<u32, 3> workgroup_size{};
81
82 u32 read_lowest = std::numeric_limits<u32>::max();
83 u32 read_highest = 0;
84
85 u32 cached_lowest = std::numeric_limits<u32>::max();
86 u32 cached_highest = 0;
87
88 bool has_unbound_instructions = false;
89};
90
91class GraphicsEnvironment final : public GenericEnvironment {
92public:
93 explicit GraphicsEnvironment() = default;
94 explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
95 Tegra::MemoryManager& gpu_memory_,
96 Tegra::Engines::Maxwell3D::Regs::ShaderProgram program,
97 GPUVAddr program_base_, u32 start_address_);
98
99 ~GraphicsEnvironment() override = default;
100
101 u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
102
103 Shader::TextureType ReadTextureType(u32 handle) override;
104
105private:
106 Tegra::Engines::Maxwell3D* maxwell3d{};
107 size_t stage_index{};
108};
109
110class ComputeEnvironment final : public GenericEnvironment {
111public:
112 explicit ComputeEnvironment() = default;
113 explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
114 Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
115 u32 start_address_);
116
117 ~ComputeEnvironment() override = default;
118
119 u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
120
121 Shader::TextureType ReadTextureType(u32 handle) override;
122
123private:
124 Tegra::Engines::KeplerCompute* kepler_compute{};
125};
126
127class FileEnvironment final : public Shader::Environment {
128public:
129 FileEnvironment() = default;
130 ~FileEnvironment() override = default;
131
132 FileEnvironment& operator=(FileEnvironment&&) noexcept = default;
133 FileEnvironment(FileEnvironment&&) noexcept = default;
134
135 FileEnvironment& operator=(const FileEnvironment&) = delete;
136 FileEnvironment(const FileEnvironment&) = delete;
137
138 void Deserialize(std::ifstream& file);
139
140 [[nodiscard]] u64 ReadInstruction(u32 address) override;
141
142 [[nodiscard]] u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
143
144 [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override;
145
146 [[nodiscard]] u32 LocalMemorySize() const override;
147
148 [[nodiscard]] u32 SharedMemorySize() const override;
149
150 [[nodiscard]] u32 TextureBoundBuffer() const override;
151
152 [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override;
153
154private:
155 std::unique_ptr<u64[]> code;
156 std::unordered_map<u32, Shader::TextureType> texture_types;
157 std::unordered_map<u64, u32> cbuf_values;
158 std::array<u32, 3> workgroup_size{};
159 u32 local_memory_size{};
160 u32 shared_memory_size{};
161 u32 texture_bound{};
162 u32 read_lowest{};
163 u32 read_highest{};
164};
165
166void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
167 const std::filesystem::path& filename, u32 cache_version);
168
169template <typename Key, typename Envs>
170void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename,
171 u32 cache_version) {
172 static_assert(std::is_trivially_copyable_v<Key>);
173 static_assert(std::has_unique_object_representations_v<Key>);
174 SerializePipeline(std::span(reinterpret_cast<const char*>(&key), sizeof(key)),
175 std::span(envs.data(), envs.size()), filename, cache_version);
176}
177
178void LoadPipelines(
179 std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version,
180 Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute,
181 Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics);
182
183} // namespace VideoCommon
diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp
index 693e47158..dc6995b46 100644
--- a/src/video_core/shader_notify.cpp
+++ b/src/video_core/shader_notify.cpp
@@ -2,42 +2,35 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <mutex> 5#include <atomic>
6#include <chrono>
7#include <optional>
8
6#include "video_core/shader_notify.h" 9#include "video_core/shader_notify.h"
7 10
8using namespace std::chrono_literals; 11using namespace std::chrono_literals;
9 12
10namespace VideoCore { 13namespace VideoCore {
11namespace {
12constexpr auto UPDATE_TICK = 32ms;
13}
14
15ShaderNotify::ShaderNotify() = default;
16ShaderNotify::~ShaderNotify() = default;
17 14
18std::size_t ShaderNotify::GetShadersBuilding() { 15const auto TIME_TO_STOP_REPORTING = 2s;
19 const auto now = std::chrono::high_resolution_clock::now(); 16
20 const auto diff = now - last_update; 17int ShaderNotify::ShadersBuilding() noexcept {
21 if (diff > UPDATE_TICK) { 18 const int now_complete = num_complete.load(std::memory_order::relaxed);
22 std::shared_lock lock(mutex); 19 const int now_building = num_building.load(std::memory_order::relaxed);
23 last_updated_count = accurate_count; 20 if (now_complete == now_building) {
21 const auto now = std::chrono::high_resolution_clock::now();
22 if (completed && num_complete == num_when_completed) {
23 if (now - complete_time > TIME_TO_STOP_REPORTING) {
24 report_base = now_complete;
25 completed = false;
26 }
27 } else {
28 completed = true;
29 num_when_completed = num_complete;
30 complete_time = now;
31 }
24 } 32 }
25 return last_updated_count; 33 return now_building - report_base;
26}
27
28std::size_t ShaderNotify::GetShadersBuildingAccurate() {
29 std::shared_lock lock{mutex};
30 return accurate_count;
31}
32
33void ShaderNotify::MarkShaderComplete() {
34 std::unique_lock lock{mutex};
35 accurate_count--;
36}
37
38void ShaderNotify::MarkSharderBuilding() {
39 std::unique_lock lock{mutex};
40 accurate_count++;
41} 34}
42 35
43} // namespace VideoCore 36} // namespace VideoCore
diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h
index a9c92d179..ad363bfb5 100644
--- a/src/video_core/shader_notify.h
+++ b/src/video_core/shader_notify.h
@@ -4,26 +4,30 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <atomic>
7#include <chrono> 8#include <chrono>
8#include <shared_mutex> 9#include <optional>
9#include "common/common_types.h"
10 10
11namespace VideoCore { 11namespace VideoCore {
12class ShaderNotify { 12class ShaderNotify {
13public: 13public:
14 ShaderNotify(); 14 [[nodiscard]] int ShadersBuilding() noexcept;
15 ~ShaderNotify();
16 15
17 std::size_t GetShadersBuilding(); 16 void MarkShaderComplete() noexcept {
18 std::size_t GetShadersBuildingAccurate(); 17 ++num_complete;
18 }
19 19
20 void MarkShaderComplete(); 20 void MarkShaderBuilding() noexcept {
21 void MarkSharderBuilding(); 21 ++num_building;
22 }
22 23
23private: 24private:
24 std::size_t last_updated_count{}; 25 std::atomic_int num_building{};
25 std::size_t accurate_count{}; 26 std::atomic_int num_complete{};
26 std::shared_mutex mutex; 27 int report_base{};
27 std::chrono::high_resolution_clock::time_point last_update{}; 28
29 bool completed{};
30 int num_when_completed{};
31 std::chrono::high_resolution_clock::time_point complete_time;
28}; 32};
29} // namespace VideoCore 33} // namespace VideoCore
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
index d10ba4ccd..249cc4d0f 100644
--- a/src/video_core/texture_cache/formatter.cpp
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -43,7 +43,7 @@ std::string Name(const ImageBase& image) {
43 return "Invalid"; 43 return "Invalid";
44} 44}
45 45
46std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) { 46std::string Name(const ImageViewBase& image_view) {
47 const u32 width = image_view.size.width; 47 const u32 width = image_view.size.width;
48 const u32 height = image_view.size.height; 48 const u32 height = image_view.size.height;
49 const u32 depth = image_view.size.depth; 49 const u32 depth = image_view.size.depth;
@@ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> t
51 const u32 num_layers = image_view.range.extent.layers; 51 const u32 num_layers = image_view.range.extent.layers;
52 52
53 const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; 53 const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : "";
54 switch (type.value_or(image_view.type)) { 54 switch (image_view.type) {
55 case ImageViewType::e1D: 55 case ImageViewType::e1D:
56 return fmt::format("ImageView 1D {}{}", width, level); 56 return fmt::format("ImageView 1D {}{}", width, level);
57 case ImageViewType::e2D: 57 case ImageViewType::e2D:
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
index a48413983..c6cf0583f 100644
--- a/src/video_core/texture_cache/formatter.h
+++ b/src/video_core/texture_cache/formatter.h
@@ -255,8 +255,7 @@ struct RenderTargets;
255 255
256[[nodiscard]] std::string Name(const ImageBase& image); 256[[nodiscard]] std::string Name(const ImageBase& image);
257 257
258[[nodiscard]] std::string Name(const ImageViewBase& image_view, 258[[nodiscard]] std::string Name(const ImageViewBase& image_view);
259 std::optional<ImageViewType> type = std::nullopt);
260 259
261[[nodiscard]] std::string Name(const RenderTargets& render_targets); 260[[nodiscard]] std::string Name(const RenderTargets& render_targets);
262 261
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
index e8d632f9e..450becbeb 100644
--- a/src/video_core/texture_cache/image_view_base.cpp
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -36,6 +36,15 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i
36 } 36 }
37} 37}
38 38
39ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info)
40 : format{info.format}, type{ImageViewType::Buffer}, size{
41 .width = info.size.width,
42 .height = 1,
43 .depth = 1,
44 } {
45 ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer");
46}
47
39ImageViewBase::ImageViewBase(const NullImageParams&) {} 48ImageViewBase::ImageViewBase(const NullImageParams&) {}
40 49
41} // namespace VideoCommon 50} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
index 73954167e..903f715c5 100644
--- a/src/video_core/texture_cache/image_view_base.h
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -27,6 +27,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits)
27struct ImageViewBase { 27struct ImageViewBase {
28 explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, 28 explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
29 ImageId image_id); 29 ImageId image_id);
30 explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info);
30 explicit ImageViewBase(const NullImageParams&); 31 explicit ImageViewBase(const NullImageParams&);
31 32
32 [[nodiscard]] bool IsBuffer() const noexcept { 33 [[nodiscard]] bool IsBuffer() const noexcept {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 85ce06d56..f34c9d9ca 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -117,6 +117,9 @@ public:
117 /// Return a reference to the given image view id 117 /// Return a reference to the given image view id
118 [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; 118 [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
119 119
120 /// Mark an image as modified from the GPU
121 void MarkModification(ImageId id) noexcept;
122
120 /// Fill image_view_ids with the graphics images in indices 123 /// Fill image_view_ids with the graphics images in indices
121 void FillGraphicsImageViews(std::span<const u32> indices, 124 void FillGraphicsImageViews(std::span<const u32> indices,
122 std::span<ImageViewId> image_view_ids); 125 std::span<ImageViewId> image_view_ids);
@@ -527,6 +530,11 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
527} 530}
528 531
529template <class P> 532template <class P>
533void TextureCache<P>::MarkModification(ImageId id) noexcept {
534 MarkModification(slot_images[id]);
535}
536
537template <class P>
530void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, 538void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
531 std::span<ImageViewId> image_view_ids) { 539 std::span<ImageViewId> image_view_ids) {
532 FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); 540 FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
@@ -540,13 +548,13 @@ void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
540 548
541template <class P> 549template <class P>
542typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { 550typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
543 [[unlikely]] if (index > graphics_sampler_table.Limit()) { 551 if (index > graphics_sampler_table.Limit()) {
544 LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); 552 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
545 return &slot_samplers[NULL_SAMPLER_ID]; 553 return &slot_samplers[NULL_SAMPLER_ID];
546 } 554 }
547 const auto [descriptor, is_new] = graphics_sampler_table.Read(index); 555 const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
548 SamplerId& id = graphics_sampler_ids[index]; 556 SamplerId& id = graphics_sampler_ids[index];
549 [[unlikely]] if (is_new) { 557 if (is_new) {
550 id = FindSampler(descriptor); 558 id = FindSampler(descriptor);
551 } 559 }
552 return &slot_samplers[id]; 560 return &slot_samplers[id];
@@ -554,13 +562,13 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
554 562
555template <class P> 563template <class P>
556typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { 564typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
557 [[unlikely]] if (index > compute_sampler_table.Limit()) { 565 if (index > compute_sampler_table.Limit()) {
558 LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); 566 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
559 return &slot_samplers[NULL_SAMPLER_ID]; 567 return &slot_samplers[NULL_SAMPLER_ID];
560 } 568 }
561 const auto [descriptor, is_new] = compute_sampler_table.Read(index); 569 const auto [descriptor, is_new] = compute_sampler_table.Read(index);
562 SamplerId& id = compute_sampler_ids[index]; 570 SamplerId& id = compute_sampler_ids[index];
563 [[unlikely]] if (is_new) { 571 if (is_new) {
564 id = FindSampler(descriptor); 572 id = FindSampler(descriptor);
565 } 573 }
566 return &slot_samplers[id]; 574 return &slot_samplers[id];
@@ -661,7 +669,7 @@ ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
661 std::span<ImageViewId> cached_image_view_ids, 669 std::span<ImageViewId> cached_image_view_ids,
662 u32 index) { 670 u32 index) {
663 if (index > table.Limit()) { 671 if (index > table.Limit()) {
664 LOG_ERROR(HW_GPU, "Invalid image view index={}", index); 672 LOG_DEBUG(HW_GPU, "Invalid image view index={}", index);
665 return NULL_IMAGE_VIEW_ID; 673 return NULL_IMAGE_VIEW_ID;
666 } 674 }
667 const auto [descriptor, is_new] = table.Read(index); 675 const auto [descriptor, is_new] = table.Read(index);
@@ -968,9 +976,6 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
968 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); 976 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
969 ConvertImage(unswizzled_data, image.info, mapped_span, copies); 977 ConvertImage(unswizzled_data, image.info, mapped_span, copies);
970 image.UploadMemory(staging, copies); 978 image.UploadMemory(staging, copies);
971 } else if (image.info.type == ImageType::Buffer) {
972 const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)};
973 image.UploadMemory(staging, copies);
974 } else { 979 } else {
975 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); 980 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
976 image.UploadMemory(staging, copies); 981 image.UploadMemory(staging, copies);
@@ -993,7 +998,12 @@ ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
993template <class P> 998template <class P>
994ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) { 999ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
995 const ImageInfo info(config); 1000 const ImageInfo info(config);
996 const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride; 1001 if (info.type == ImageType::Buffer) {
1002 const ImageViewInfo view_info(config, 0);
1003 return slot_image_views.insert(runtime, info, view_info, config.Address());
1004 }
1005 const u32 layer_offset = config.BaseLayer() * info.layer_stride;
1006 const GPUVAddr image_gpu_addr = config.Address() - layer_offset;
997 const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); 1007 const ImageId image_id = FindOrInsertImage(info, image_gpu_addr);
998 if (!image_id) { 1008 if (!image_id) {
999 return NULL_IMAGE_VIEW_ID; 1009 return NULL_IMAGE_VIEW_ID;
@@ -1801,6 +1811,9 @@ void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modifi
1801 return; 1811 return;
1802 } 1812 }
1803 const ImageViewBase& image_view = slot_image_views[image_view_id]; 1813 const ImageViewBase& image_view = slot_image_views[image_view_id];
1814 if (image_view.IsBuffer()) {
1815 return;
1816 }
1804 PrepareImage(image_view.image_id, is_modification, invalidate); 1817 PrepareImage(image_view.image_id, is_modification, invalidate);
1805} 1818}
1806 1819
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index c1d14335e..1a9399455 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -154,6 +154,15 @@ union TextureHandle {
154}; 154};
155static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); 155static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
156 156
157[[nodiscard]] inline std::pair<u32, u32> TexturePair(u32 raw, bool via_header_index) {
158 if (via_header_index) {
159 return {raw, raw};
160 } else {
161 const Tegra::Texture::TextureHandle handle{raw};
162 return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id};
163 }
164}
165
157struct TICEntry { 166struct TICEntry {
158 union { 167 union {
159 struct { 168 struct {
diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp
new file mode 100644
index 000000000..ba26ac3f1
--- /dev/null
+++ b/src/video_core/transform_feedback.cpp
@@ -0,0 +1,99 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <vector>
8
9#include "common/alignment.h"
10#include "common/assert.h"
11#include "shader_recompiler/shader_info.h"
12#include "video_core/transform_feedback.h"
13
14namespace VideoCommon {
15
16std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
17 const TransformFeedbackState& state) {
18 static constexpr std::array VECTORS{
19 28, // gl_Position
20 32, // Generic 0
21 36, // Generic 1
22 40, // Generic 2
23 44, // Generic 3
24 48, // Generic 4
25 52, // Generic 5
26 56, // Generic 6
27 60, // Generic 7
28 64, // Generic 8
29 68, // Generic 9
30 72, // Generic 10
31 76, // Generic 11
32 80, // Generic 12
33 84, // Generic 13
34 88, // Generic 14
35 92, // Generic 15
36 96, // Generic 16
37 100, // Generic 17
38 104, // Generic 18
39 108, // Generic 19
40 112, // Generic 20
41 116, // Generic 21
42 120, // Generic 22
43 124, // Generic 23
44 128, // Generic 24
45 132, // Generic 25
46 136, // Generic 26
47 140, // Generic 27
48 144, // Generic 28
49 148, // Generic 29
50 152, // Generic 30
51 156, // Generic 31
52 160, // gl_FrontColor
53 164, // gl_FrontSecondaryColor
54 160, // gl_BackColor
55 164, // gl_BackSecondaryColor
56 192, // gl_TexCoord[0]
57 196, // gl_TexCoord[1]
58 200, // gl_TexCoord[2]
59 204, // gl_TexCoord[3]
60 208, // gl_TexCoord[4]
61 212, // gl_TexCoord[5]
62 216, // gl_TexCoord[6]
63 220, // gl_TexCoord[7]
64 };
65 std::vector<Shader::TransformFeedbackVarying> xfb(256);
66 for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) {
67 const auto& locations = state.varyings[buffer];
68 const auto& layout = state.layouts[buffer];
69 const u32 varying_count = layout.varying_count;
70 u32 highest = 0;
71 for (u32 offset = 0; offset < varying_count; ++offset) {
72 const u32 base_offset = offset;
73 const u8 location = locations[offset];
74
75 UNIMPLEMENTED_IF_MSG(layout.stream != 0, "Stream is not zero: {}", layout.stream);
76 Shader::TransformFeedbackVarying varying{
77 .buffer = static_cast<u32>(buffer),
78 .stride = layout.stride,
79 .offset = offset * 4,
80 .components = 1,
81 };
82 if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) {
83 UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
84
85 const u8 base_index = location / 4;
86 while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
87 ++offset;
88 ++varying.components;
89 }
90 }
91 xfb[location] = varying;
92 highest = std::max(highest, (base_offset + varying.components) * 4);
93 }
94 UNIMPLEMENTED_IF(highest != layout.stride);
95 }
96 return xfb;
97}
98
99} // namespace VideoCommon
diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h
new file mode 100644
index 000000000..8f6946d65
--- /dev/null
+++ b/src/video_core/transform_feedback.h
@@ -0,0 +1,30 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <vector>
9
10#include "common/common_types.h"
11#include "shader_recompiler/runtime_info.h"
12#include "video_core/engines/maxwell_3d.h"
13
14namespace VideoCommon {
15
16struct TransformFeedbackState {
17 struct Layout {
18 u32 stream;
19 u32 varying_count;
20 u32 stride;
21 };
22 std::array<Layout, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> layouts;
23 std::array<std::array<u8, 128>, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
24 varyings;
25};
26
27std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
28 const TransformFeedbackState& state);
29
30} // namespace VideoCommon
diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
index 758c038ba..fdd1a5081 100644
--- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
@@ -73,12 +73,11 @@ NsightAftermathTracker::~NsightAftermathTracker() {
73 } 73 }
74} 74}
75 75
76void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const { 76void NsightAftermathTracker::SaveShader(std::span<const u32> spirv) const {
77 if (!initialized) { 77 if (!initialized) {
78 return; 78 return;
79 } 79 }
80 80 std::vector<u32> spirv_copy(spirv.begin(), spirv.end());
81 std::vector<u32> spirv_copy = spirv;
82 GFSDK_Aftermath_SpirvCode shader; 81 GFSDK_Aftermath_SpirvCode shader;
83 shader.pData = spirv_copy.data(); 82 shader.pData = spirv_copy.data();
84 shader.size = static_cast<u32>(spirv_copy.size() * 4); 83 shader.size = static_cast<u32>(spirv_copy.size() * 4);
@@ -100,7 +99,7 @@ void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
100 LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash); 99 LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash);
101 return; 100 return;
102 } 101 }
103 if (file.Write(spirv) != spirv.size()) { 102 if (file.WriteSpan(spirv) != spirv.size()) {
104 LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash); 103 LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash);
105 return; 104 return;
106 } 105 }
diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
index 4fe2b14d9..eae1891dd 100644
--- a/src/video_core/vulkan_common/nsight_aftermath_tracker.h
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
@@ -6,6 +6,7 @@
6 6
7#include <filesystem> 7#include <filesystem>
8#include <mutex> 8#include <mutex>
9#include <span>
9#include <string> 10#include <string>
10#include <vector> 11#include <vector>
11 12
@@ -33,7 +34,7 @@ public:
33 NsightAftermathTracker(NsightAftermathTracker&&) = delete; 34 NsightAftermathTracker(NsightAftermathTracker&&) = delete;
34 NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete; 35 NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete;
35 36
36 void SaveShader(const std::vector<u32>& spirv) const; 37 void SaveShader(std::span<const u32> spirv) const;
37 38
38private: 39private:
39#ifdef HAS_NSIGHT_AFTERMATH 40#ifdef HAS_NSIGHT_AFTERMATH
@@ -61,21 +62,21 @@ private:
61 bool initialized = false; 62 bool initialized = false;
62 63
63 Common::DynamicLibrary dl; 64 Common::DynamicLibrary dl;
64 PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps; 65 PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps{};
65 PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps; 66 PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps{};
66 PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier; 67 PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier{};
67 PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv; 68 PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv{};
68 PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder; 69 PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder{};
69 PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder; 70 PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder{};
70 PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON; 71 PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON{};
71 PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON; 72 PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON{};
72#endif 73#endif
73}; 74};
74 75
75#ifndef HAS_NSIGHT_AFTERMATH 76#ifndef HAS_NSIGHT_AFTERMATH
76inline NsightAftermathTracker::NsightAftermathTracker() = default; 77inline NsightAftermathTracker::NsightAftermathTracker() = default;
77inline NsightAftermathTracker::~NsightAftermathTracker() = default; 78inline NsightAftermathTracker::~NsightAftermathTracker() = default;
78inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {} 79inline void NsightAftermathTracker::SaveShader(std::span<const u32>) const {}
79#endif 80#endif
80 81
81} // namespace Vulkan 82} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index f214510da..44afdc1cd 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <bitset> 6#include <bitset>
6#include <chrono> 7#include <chrono>
7#include <optional> 8#include <optional>
@@ -33,6 +34,12 @@ constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{
33}; 34};
34} // namespace Alternatives 35} // namespace Alternatives
35 36
37enum class NvidiaArchitecture {
38 AmpereOrNewer,
39 Turing,
40 VoltaOrOlder,
41};
42
36constexpr std::array REQUIRED_EXTENSIONS{ 43constexpr std::array REQUIRED_EXTENSIONS{
37 VK_KHR_MAINTENANCE1_EXTENSION_NAME, 44 VK_KHR_MAINTENANCE1_EXTENSION_NAME,
38 VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, 45 VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
@@ -43,11 +50,14 @@ constexpr std::array REQUIRED_EXTENSIONS{
43 VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, 50 VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
44 VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, 51 VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
45 VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, 52 VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
53 VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME,
54 VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME,
46 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, 55 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
47 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, 56 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
48 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, 57 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
49 VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, 58 VK_EXT_ROBUSTNESS_2_EXTENSION_NAME,
50 VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, 59 VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
60 VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME,
51#ifdef _WIN32 61#ifdef _WIN32
52 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, 62 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
53#endif 63#endif
@@ -112,6 +122,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
112 VK_FORMAT_R16G16_SFLOAT, 122 VK_FORMAT_R16G16_SFLOAT,
113 VK_FORMAT_R16G16_SINT, 123 VK_FORMAT_R16G16_SINT,
114 VK_FORMAT_R16_UNORM, 124 VK_FORMAT_R16_UNORM,
125 VK_FORMAT_R16_SNORM,
115 VK_FORMAT_R16_UINT, 126 VK_FORMAT_R16_UINT,
116 VK_FORMAT_R8G8B8A8_SRGB, 127 VK_FORMAT_R8G8B8A8_SRGB,
117 VK_FORMAT_R8G8_UNORM, 128 VK_FORMAT_R8G8_UNORM,
@@ -191,15 +202,47 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
191 return format_properties; 202 return format_properties;
192} 203}
193 204
205std::vector<std::string> GetSupportedExtensions(vk::PhysicalDevice physical) {
206 const std::vector extensions = physical.EnumerateDeviceExtensionProperties();
207 std::vector<std::string> supported_extensions;
208 supported_extensions.reserve(extensions.size());
209 for (const auto& extension : extensions) {
210 supported_extensions.emplace_back(extension.extensionName);
211 }
212 return supported_extensions;
213}
214
215NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
216 std::span<const std::string> exts) {
217 if (std::ranges::find(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != exts.end()) {
218 VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{};
219 shading_rate_props.sType =
220 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR;
221 VkPhysicalDeviceProperties2KHR physical_properties{};
222 physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
223 physical_properties.pNext = &shading_rate_props;
224 physical.GetProperties2KHR(physical_properties);
225 if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) {
226 // Only Ampere and newer support this feature
227 return NvidiaArchitecture::AmpereOrNewer;
228 }
229 }
230 if (std::ranges::find(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME) != exts.end()) {
231 return NvidiaArchitecture::Turing;
232 }
233 return NvidiaArchitecture::VoltaOrOlder;
234}
194} // Anonymous namespace 235} // Anonymous namespace
195 236
196Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, 237Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface,
197 const vk::InstanceDispatch& dld_) 238 const vk::InstanceDispatch& dld_)
198 : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, 239 : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
199 format_properties{GetFormatProperties(physical)} { 240 supported_extensions{GetSupportedExtensions(physical)},
241 format_properties(GetFormatProperties(physical)) {
200 CheckSuitability(surface != nullptr); 242 CheckSuitability(surface != nullptr);
201 SetupFamilies(surface); 243 SetupFamilies(surface);
202 SetupFeatures(); 244 SetupFeatures();
245 SetupProperties();
203 246
204 const auto queue_cis = GetDeviceQueueCreateInfos(); 247 const auto queue_cis = GetDeviceQueueCreateInfos();
205 const std::vector extensions = LoadExtensions(surface != nullptr); 248 const std::vector extensions = LoadExtensions(surface != nullptr);
@@ -214,16 +257,16 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
214 .independentBlend = true, 257 .independentBlend = true,
215 .geometryShader = true, 258 .geometryShader = true,
216 .tessellationShader = true, 259 .tessellationShader = true,
217 .sampleRateShading = false, 260 .sampleRateShading = true,
218 .dualSrcBlend = false, 261 .dualSrcBlend = true,
219 .logicOp = false, 262 .logicOp = false,
220 .multiDrawIndirect = false, 263 .multiDrawIndirect = false,
221 .drawIndirectFirstInstance = false, 264 .drawIndirectFirstInstance = false,
222 .depthClamp = true, 265 .depthClamp = true,
223 .depthBiasClamp = true, 266 .depthBiasClamp = true,
224 .fillModeNonSolid = false, 267 .fillModeNonSolid = true,
225 .depthBounds = false, 268 .depthBounds = is_depth_bounds_supported,
226 .wideLines = false, 269 .wideLines = true,
227 .largePoints = true, 270 .largePoints = true,
228 .alphaToOne = false, 271 .alphaToOne = false,
229 .multiViewport = true, 272 .multiViewport = true,
@@ -245,11 +288,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
245 .shaderSampledImageArrayDynamicIndexing = false, 288 .shaderSampledImageArrayDynamicIndexing = false,
246 .shaderStorageBufferArrayDynamicIndexing = false, 289 .shaderStorageBufferArrayDynamicIndexing = false,
247 .shaderStorageImageArrayDynamicIndexing = false, 290 .shaderStorageImageArrayDynamicIndexing = false,
248 .shaderClipDistance = false, 291 .shaderClipDistance = true,
249 .shaderCullDistance = false, 292 .shaderCullDistance = true,
250 .shaderFloat64 = false, 293 .shaderFloat64 = is_shader_float64_supported,
251 .shaderInt64 = false, 294 .shaderInt64 = is_shader_int64_supported,
252 .shaderInt16 = false, 295 .shaderInt16 = is_shader_int16_supported,
253 .shaderResourceResidency = false, 296 .shaderResourceResidency = false,
254 .shaderResourceMinLod = false, 297 .shaderResourceMinLod = false,
255 .sparseBinding = false, 298 .sparseBinding = false,
@@ -278,7 +321,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
278 VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ 321 VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{
279 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, 322 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR,
280 .pNext = nullptr, 323 .pNext = nullptr,
281 .storageBuffer16BitAccess = false, 324 .storageBuffer16BitAccess = true,
282 .uniformAndStorageBuffer16BitAccess = true, 325 .uniformAndStorageBuffer16BitAccess = true,
283 .storagePushConstant16 = false, 326 .storagePushConstant16 = false,
284 .storageInputOutput16 = false, 327 .storageInputOutput16 = false,
@@ -310,6 +353,21 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
310 }; 353 };
311 SetNext(next, host_query_reset); 354 SetNext(next, host_query_reset);
312 355
356 VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{
357 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR,
358 .pNext = nullptr,
359 .variablePointersStorageBuffer = VK_TRUE,
360 .variablePointers = VK_TRUE,
361 };
362 SetNext(next, variable_pointers);
363
364 VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{
365 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT,
366 .pNext = nullptr,
367 .shaderDemoteToHelperInvocation = true,
368 };
369 SetNext(next, demote);
370
313 VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; 371 VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
314 if (is_float16_supported) { 372 if (is_float16_supported) {
315 float16_int8 = { 373 float16_int8 = {
@@ -327,6 +385,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
327 LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles"); 385 LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles");
328 } 386 }
329 387
388 if (!nv_viewport_array2) {
389 LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks");
390 }
391
392 if (!nv_geometry_shader_passthrough) {
393 LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders");
394 }
395
330 VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; 396 VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout;
331 if (khr_uniform_buffer_standard_layout) { 397 if (khr_uniform_buffer_standard_layout) {
332 std430_layout = { 398 std430_layout = {
@@ -389,12 +455,83 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
389 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); 455 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
390 } 456 }
391 457
458 VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster;
459 if (ext_line_rasterization) {
460 line_raster = {
461 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT,
462 .pNext = nullptr,
463 .rectangularLines = VK_TRUE,
464 .bresenhamLines = VK_FALSE,
465 .smoothLines = VK_TRUE,
466 .stippledRectangularLines = VK_FALSE,
467 .stippledBresenhamLines = VK_FALSE,
468 .stippledSmoothLines = VK_FALSE,
469 };
470 SetNext(next, line_raster);
471 } else {
472 LOG_INFO(Render_Vulkan, "Device doesn't support smooth lines");
473 }
474
475 if (!ext_conservative_rasterization) {
476 LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization");
477 }
478
479 VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex;
480 if (ext_provoking_vertex) {
481 provoking_vertex = {
482 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT,
483 .pNext = nullptr,
484 .provokingVertexLast = VK_TRUE,
485 .transformFeedbackPreservesProvokingVertex = VK_TRUE,
486 };
487 SetNext(next, provoking_vertex);
488 } else {
489 LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last");
490 }
491
492 VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input_dynamic;
493 if (ext_vertex_input_dynamic_state) {
494 vertex_input_dynamic = {
495 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT,
496 .pNext = nullptr,
497 .vertexInputDynamicState = VK_TRUE,
498 };
499 SetNext(next, vertex_input_dynamic);
500 } else {
501 LOG_INFO(Render_Vulkan, "Device doesn't support vertex input dynamic state");
502 }
503
504 VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64;
505 if (ext_shader_atomic_int64) {
506 atomic_int64 = {
507 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR,
508 .pNext = nullptr,
509 .shaderBufferInt64Atomics = VK_TRUE,
510 .shaderSharedInt64Atomics = VK_TRUE,
511 };
512 SetNext(next, atomic_int64);
513 }
514
515 VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout;
516 if (khr_workgroup_memory_explicit_layout) {
517 workgroup_layout = {
518 .sType =
519 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR,
520 .pNext = nullptr,
521 .workgroupMemoryExplicitLayout = VK_TRUE,
522 .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE,
523 .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE,
524 .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE,
525 };
526 SetNext(next, workgroup_layout);
527 }
528
392 if (!ext_depth_range_unrestricted) { 529 if (!ext_depth_range_unrestricted) {
393 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); 530 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
394 } 531 }
395 532
396 VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; 533 VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv;
397 if (nv_device_diagnostics_config) { 534 if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) {
398 nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>(); 535 nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>();
399 536
400 diagnostics_nv = { 537 diagnostics_nv = {
@@ -412,11 +549,33 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
412 CollectTelemetryParameters(); 549 CollectTelemetryParameters();
413 CollectToolingInfo(); 550 CollectToolingInfo();
414 551
552 if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
553 const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
554 switch (arch) {
555 case NvidiaArchitecture::AmpereOrNewer:
556 LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math");
557 is_float16_supported = false;
558 break;
559 case NvidiaArchitecture::Turing:
560 break;
561 case NvidiaArchitecture::VoltaOrOlder:
562 LOG_WARNING(Render_Vulkan, "Blacklisting Volta and older from VK_KHR_push_descriptor");
563 khr_push_descriptor = false;
564 break;
565 }
566 }
415 if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { 567 if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) {
416 LOG_WARNING( 568 // Mask driver version variant
417 Render_Vulkan, 569 const u32 version = (properties.driverVersion << 3) >> 3;
418 "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); 570 if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) {
419 ext_extended_dynamic_state = false; 571 LOG_WARNING(Render_Vulkan,
572 "RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state");
573 ext_extended_dynamic_state = false;
574 }
575 }
576 if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
577 LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state");
578 ext_vertex_input_dynamic_state = false;
420 } 579 }
421 if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { 580 if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
422 // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. 581 // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.
@@ -426,8 +585,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
426 585
427 graphics_queue = logical.GetQueue(graphics_family); 586 graphics_queue = logical.GetQueue(graphics_family);
428 present_queue = logical.GetQueue(present_family); 587 present_queue = logical.GetQueue(present_family);
429
430 use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
431} 588}
432 589
433Device::~Device() = default; 590Device::~Device() = default;
@@ -471,7 +628,7 @@ void Device::ReportLoss() const {
471 std::this_thread::sleep_for(std::chrono::seconds{15}); 628 std::this_thread::sleep_for(std::chrono::seconds{15});
472} 629}
473 630
474void Device::SaveShader(const std::vector<u32>& spirv) const { 631void Device::SaveShader(std::span<const u32> spirv) const {
475 if (nsight_aftermath_tracker) { 632 if (nsight_aftermath_tracker) {
476 nsight_aftermath_tracker->SaveShader(spirv); 633 nsight_aftermath_tracker->SaveShader(spirv);
477 } 634 }
@@ -597,10 +754,20 @@ void Device::CheckSuitability(bool requires_swapchain) const {
597 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); 754 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
598 } 755 }
599 } 756 }
757 VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{};
758 demote.sType =
759 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT;
760 demote.pNext = nullptr;
761
762 VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{};
763 variable_pointers.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR;
764 variable_pointers.pNext = &demote;
765
600 VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; 766 VkPhysicalDeviceRobustness2FeaturesEXT robustness2{};
601 robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; 767 robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
768 robustness2.pNext = &variable_pointers;
602 769
603 VkPhysicalDeviceFeatures2 features2{}; 770 VkPhysicalDeviceFeatures2KHR features2{};
604 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; 771 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
605 features2.pNext = &robustness2; 772 features2.pNext = &robustness2;
606 773
@@ -610,7 +777,6 @@ void Device::CheckSuitability(bool requires_swapchain) const {
610 const std::array feature_report{ 777 const std::array feature_report{
611 std::make_pair(features.robustBufferAccess, "robustBufferAccess"), 778 std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
612 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), 779 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
613 std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
614 std::make_pair(features.imageCubeArray, "imageCubeArray"), 780 std::make_pair(features.imageCubeArray, "imageCubeArray"),
615 std::make_pair(features.independentBlend, "independentBlend"), 781 std::make_pair(features.independentBlend, "independentBlend"),
616 std::make_pair(features.depthClamp, "depthClamp"), 782 std::make_pair(features.depthClamp, "depthClamp"),
@@ -618,13 +784,23 @@ void Device::CheckSuitability(bool requires_swapchain) const {
618 std::make_pair(features.largePoints, "largePoints"), 784 std::make_pair(features.largePoints, "largePoints"),
619 std::make_pair(features.multiViewport, "multiViewport"), 785 std::make_pair(features.multiViewport, "multiViewport"),
620 std::make_pair(features.depthBiasClamp, "depthBiasClamp"), 786 std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
787 std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"),
788 std::make_pair(features.wideLines, "wideLines"),
621 std::make_pair(features.geometryShader, "geometryShader"), 789 std::make_pair(features.geometryShader, "geometryShader"),
622 std::make_pair(features.tessellationShader, "tessellationShader"), 790 std::make_pair(features.tessellationShader, "tessellationShader"),
791 std::make_pair(features.sampleRateShading, "sampleRateShading"),
792 std::make_pair(features.dualSrcBlend, "dualSrcBlend"),
623 std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), 793 std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
624 std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), 794 std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
625 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), 795 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
626 std::make_pair(features.shaderStorageImageWriteWithoutFormat, 796 std::make_pair(features.shaderStorageImageWriteWithoutFormat,
627 "shaderStorageImageWriteWithoutFormat"), 797 "shaderStorageImageWriteWithoutFormat"),
798 std::make_pair(features.shaderClipDistance, "shaderClipDistance"),
799 std::make_pair(features.shaderCullDistance, "shaderCullDistance"),
800 std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"),
801 std::make_pair(variable_pointers.variablePointers, "variablePointers"),
802 std::make_pair(variable_pointers.variablePointersStorageBuffer,
803 "variablePointersStorageBuffer"),
628 std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), 804 std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"),
629 std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), 805 std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"),
630 std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), 806 std::make_pair(robustness2.nullDescriptor, "nullDescriptor"),
@@ -647,14 +823,19 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
647 } 823 }
648 824
649 bool has_khr_shader_float16_int8{}; 825 bool has_khr_shader_float16_int8{};
826 bool has_khr_workgroup_memory_explicit_layout{};
650 bool has_ext_subgroup_size_control{}; 827 bool has_ext_subgroup_size_control{};
651 bool has_ext_transform_feedback{}; 828 bool has_ext_transform_feedback{};
652 bool has_ext_custom_border_color{}; 829 bool has_ext_custom_border_color{};
653 bool has_ext_extended_dynamic_state{}; 830 bool has_ext_extended_dynamic_state{};
654 for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { 831 bool has_ext_shader_atomic_int64{};
832 bool has_ext_provoking_vertex{};
833 bool has_ext_vertex_input_dynamic_state{};
834 bool has_ext_line_rasterization{};
835 for (const std::string& extension : supported_extensions) {
655 const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, 836 const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
656 bool push) { 837 bool push) {
657 if (extension.extensionName != std::string_view(name)) { 838 if (extension != name) {
658 return; 839 return;
659 } 840 }
660 if (push) { 841 if (push) {
@@ -665,8 +846,13 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
665 } 846 }
666 }; 847 };
667 test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); 848 test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);
849 test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true);
850 test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME,
851 true);
668 test(khr_uniform_buffer_standard_layout, 852 test(khr_uniform_buffer_standard_layout,
669 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); 853 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
854 test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true);
855 test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true);
670 test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); 856 test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
671 test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); 857 test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
672 test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); 858 test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
@@ -675,16 +861,25 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
675 true); 861 true);
676 test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); 862 test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true);
677 test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); 863 test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true);
864 test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME,
865 true);
678 test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); 866 test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
679 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); 867 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
680 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); 868 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
681 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); 869 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
682 if (Settings::values.renderer_debug) { 870 test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false);
871 test(has_ext_vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME,
872 false);
873 test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false);
874 test(has_khr_workgroup_memory_explicit_layout,
875 VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false);
876 test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false);
877 if (Settings::values.enable_nsight_aftermath) {
683 test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, 878 test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
684 true); 879 true);
685 } 880 }
686 } 881 }
687 VkPhysicalDeviceFeatures2KHR features; 882 VkPhysicalDeviceFeatures2KHR features{};
688 features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; 883 features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
689 884
690 VkPhysicalDeviceProperties2KHR physical_properties; 885 VkPhysicalDeviceProperties2KHR physical_properties;
@@ -722,10 +917,49 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
722 subgroup_properties.maxSubgroupSize >= GuestWarpSize) { 917 subgroup_properties.maxSubgroupSize >= GuestWarpSize) {
723 extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); 918 extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
724 guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; 919 guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages;
920 ext_subgroup_size_control = true;
725 } 921 }
726 } else { 922 } else {
727 is_warp_potentially_bigger = true; 923 is_warp_potentially_bigger = true;
728 } 924 }
925 if (has_ext_provoking_vertex) {
926 VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex;
927 provoking_vertex.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT;
928 provoking_vertex.pNext = nullptr;
929 features.pNext = &provoking_vertex;
930 physical.GetFeatures2KHR(features);
931
932 if (provoking_vertex.provokingVertexLast &&
933 provoking_vertex.transformFeedbackPreservesProvokingVertex) {
934 extensions.push_back(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
935 ext_provoking_vertex = true;
936 }
937 }
938 if (has_ext_vertex_input_dynamic_state) {
939 VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input;
940 vertex_input.sType =
941 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT;
942 vertex_input.pNext = nullptr;
943 features.pNext = &vertex_input;
944 physical.GetFeatures2KHR(features);
945
946 if (vertex_input.vertexInputDynamicState) {
947 extensions.push_back(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
948 ext_vertex_input_dynamic_state = true;
949 }
950 }
951 if (has_ext_shader_atomic_int64) {
952 VkPhysicalDeviceShaderAtomicInt64Features atomic_int64;
953 atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
954 atomic_int64.pNext = nullptr;
955 features.pNext = &atomic_int64;
956 physical.GetFeatures2KHR(features);
957
958 if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) {
959 extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
960 ext_shader_atomic_int64 = true;
961 }
962 }
729 if (has_ext_transform_feedback) { 963 if (has_ext_transform_feedback) {
730 VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; 964 VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features;
731 tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; 965 tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
@@ -760,17 +994,55 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
760 } 994 }
761 } 995 }
762 if (has_ext_extended_dynamic_state) { 996 if (has_ext_extended_dynamic_state) {
763 VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; 997 VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extended_dynamic_state;
764 dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; 998 extended_dynamic_state.sType =
765 dynamic_state.pNext = nullptr; 999 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
766 features.pNext = &dynamic_state; 1000 extended_dynamic_state.pNext = nullptr;
1001 features.pNext = &extended_dynamic_state;
767 physical.GetFeatures2KHR(features); 1002 physical.GetFeatures2KHR(features);
768 1003
769 if (dynamic_state.extendedDynamicState) { 1004 if (extended_dynamic_state.extendedDynamicState) {
770 extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); 1005 extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
771 ext_extended_dynamic_state = true; 1006 ext_extended_dynamic_state = true;
772 } 1007 }
773 } 1008 }
1009 if (has_ext_line_rasterization) {
1010 VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster;
1011 line_raster.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT;
1012 line_raster.pNext = nullptr;
1013 features.pNext = &line_raster;
1014 physical.GetFeatures2KHR(features);
1015 if (line_raster.rectangularLines && line_raster.smoothLines) {
1016 extensions.push_back(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME);
1017 ext_line_rasterization = true;
1018 }
1019 }
1020 if (has_khr_workgroup_memory_explicit_layout) {
1021 VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout;
1022 layout.sType =
1023 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR;
1024 layout.pNext = nullptr;
1025 features.pNext = &layout;
1026 physical.GetFeatures2KHR(features);
1027
1028 if (layout.workgroupMemoryExplicitLayout &&
1029 layout.workgroupMemoryExplicitLayout8BitAccess &&
1030 layout.workgroupMemoryExplicitLayout16BitAccess &&
1031 layout.workgroupMemoryExplicitLayoutScalarBlockLayout) {
1032 extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
1033 khr_workgroup_memory_explicit_layout = true;
1034 }
1035 }
1036 if (khr_push_descriptor) {
1037 VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor;
1038 push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR;
1039 push_descriptor.pNext = nullptr;
1040
1041 physical_properties.pNext = &push_descriptor;
1042 physical.GetProperties2KHR(physical_properties);
1043
1044 max_push_descriptors = push_descriptor.maxPushDescriptors;
1045 }
774 return extensions; 1046 return extensions;
775} 1047}
776 1048
@@ -806,11 +1078,25 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
806} 1078}
807 1079
808void Device::SetupFeatures() { 1080void Device::SetupFeatures() {
809 const auto supported_features{physical.GetFeatures()}; 1081 const VkPhysicalDeviceFeatures features{physical.GetFeatures()};
810 is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; 1082 is_depth_bounds_supported = features.depthBounds;
811 is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample; 1083 is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat;
1084 is_shader_float64_supported = features.shaderFloat64;
1085 is_shader_int64_supported = features.shaderInt64;
1086 is_shader_int16_supported = features.shaderInt16;
1087 is_shader_storage_image_multisample = features.shaderStorageImageMultisample;
812 is_blit_depth_stencil_supported = TestDepthStencilBlits(); 1088 is_blit_depth_stencil_supported = TestDepthStencilBlits();
813 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); 1089 is_optimal_astc_supported = IsOptimalAstcSupported(features);
1090}
1091
1092void Device::SetupProperties() {
1093 float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR;
1094
1095 VkPhysicalDeviceProperties2KHR properties2{};
1096 properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
1097 properties2.pNext = &float_controls;
1098
1099 physical.GetProperties2KHR(properties2);
814} 1100}
815 1101
816void Device::CollectTelemetryParameters() { 1102void Device::CollectTelemetryParameters() {
@@ -832,12 +1118,6 @@ void Device::CollectTelemetryParameters() {
832 1118
833 driver_id = driver.driverID; 1119 driver_id = driver.driverID;
834 vendor_name = driver.driverName; 1120 vendor_name = driver.driverName;
835
836 const std::vector extensions = physical.EnumerateDeviceExtensionProperties();
837 reported_extensions.reserve(std::size(extensions));
838 for (const auto& extension : extensions) {
839 reported_extensions.emplace_back(extension.extensionName);
840 }
841} 1121}
842 1122
843void Device::CollectPhysicalMemoryInfo() { 1123void Device::CollectPhysicalMemoryInfo() {
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 96c0f8c60..df394e384 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <span>
7#include <string> 8#include <string>
8#include <string_view> 9#include <string_view>
9#include <unordered_map> 10#include <unordered_map>
@@ -43,7 +44,7 @@ public:
43 void ReportLoss() const; 44 void ReportLoss() const;
44 45
45 /// Reports a shader to Nsight Aftermath. 46 /// Reports a shader to Nsight Aftermath.
46 void SaveShader(const std::vector<u32>& spirv) const; 47 void SaveShader(std::span<const u32> spirv) const;
47 48
48 /// Returns the name of the VkDriverId reported from Vulkan. 49 /// Returns the name of the VkDriverId reported from Vulkan.
49 std::string GetDriverName() const; 50 std::string GetDriverName() const;
@@ -128,6 +129,11 @@ public:
128 return properties.limits.maxComputeSharedMemorySize; 129 return properties.limits.maxComputeSharedMemorySize;
129 } 130 }
130 131
132 /// Returns float control properties of the device.
133 const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {
134 return float_controls;
135 }
136
131 /// Returns true if ASTC is natively supported. 137 /// Returns true if ASTC is natively supported.
132 bool IsOptimalAstcSupported() const { 138 bool IsOptimalAstcSupported() const {
133 return is_optimal_astc_supported; 139 return is_optimal_astc_supported;
@@ -148,11 +154,31 @@ public:
148 return guest_warp_stages & stage; 154 return guest_warp_stages & stage;
149 } 155 }
150 156
157 /// Returns the maximum number of push descriptors.
158 u32 MaxPushDescriptors() const {
159 return max_push_descriptors;
160 }
161
151 /// Returns true if formatless image load is supported. 162 /// Returns true if formatless image load is supported.
152 bool IsFormatlessImageLoadSupported() const { 163 bool IsFormatlessImageLoadSupported() const {
153 return is_formatless_image_load_supported; 164 return is_formatless_image_load_supported;
154 } 165 }
155 166
167 /// Returns true if shader int64 is supported.
168 bool IsShaderInt64Supported() const {
169 return is_shader_int64_supported;
170 }
171
172 /// Returns true if shader int16 is supported.
173 bool IsShaderInt16Supported() const {
174 return is_shader_int16_supported;
175 }
176
177 // Returns true if depth bounds is supported.
178 bool IsDepthBoundsSupported() const {
179 return is_depth_bounds_supported;
180 }
181
156 /// Returns true when blitting from and to depth stencil images is supported. 182 /// Returns true when blitting from and to depth stencil images is supported.
157 bool IsBlitDepthStencilSupported() const { 183 bool IsBlitDepthStencilSupported() const {
158 return is_blit_depth_stencil_supported; 184 return is_blit_depth_stencil_supported;
@@ -163,11 +189,36 @@ public:
163 return nv_viewport_swizzle; 189 return nv_viewport_swizzle;
164 } 190 }
165 191
166 /// Returns true if the device supports VK_EXT_scalar_block_layout. 192 /// Returns true if the device supports VK_NV_viewport_array2.
193 bool IsNvViewportArray2Supported() const {
194 return nv_viewport_array2;
195 }
196
197 /// Returns true if the device supports VK_NV_geometry_shader_passthrough.
198 bool IsNvGeometryShaderPassthroughSupported() const {
199 return nv_geometry_shader_passthrough;
200 }
201
202 /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout.
167 bool IsKhrUniformBufferStandardLayoutSupported() const { 203 bool IsKhrUniformBufferStandardLayoutSupported() const {
168 return khr_uniform_buffer_standard_layout; 204 return khr_uniform_buffer_standard_layout;
169 } 205 }
170 206
207 /// Returns true if the device supports VK_KHR_spirv_1_4.
208 bool IsKhrSpirv1_4Supported() const {
209 return khr_spirv_1_4;
210 }
211
212 /// Returns true if the device supports VK_KHR_push_descriptor.
213 bool IsKhrPushDescriptorSupported() const {
214 return khr_push_descriptor;
215 }
216
217 /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
218 bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
219 return khr_workgroup_memory_explicit_layout;
220 }
221
171 /// Returns true if the device supports VK_EXT_index_type_uint8. 222 /// Returns true if the device supports VK_EXT_index_type_uint8.
172 bool IsExtIndexTypeUint8Supported() const { 223 bool IsExtIndexTypeUint8Supported() const {
173 return ext_index_type_uint8; 224 return ext_index_type_uint8;
@@ -188,6 +239,11 @@ public:
188 return ext_shader_viewport_index_layer; 239 return ext_shader_viewport_index_layer;
189 } 240 }
190 241
242 /// Returns true if the device supports VK_EXT_subgroup_size_control.
243 bool IsExtSubgroupSizeControlSupported() const {
244 return ext_subgroup_size_control;
245 }
246
191 /// Returns true if the device supports VK_EXT_transform_feedback. 247 /// Returns true if the device supports VK_EXT_transform_feedback.
192 bool IsExtTransformFeedbackSupported() const { 248 bool IsExtTransformFeedbackSupported() const {
193 return ext_transform_feedback; 249 return ext_transform_feedback;
@@ -203,11 +259,36 @@ public:
203 return ext_extended_dynamic_state; 259 return ext_extended_dynamic_state;
204 } 260 }
205 261
262 /// Returns true if the device supports VK_EXT_line_rasterization.
263 bool IsExtLineRasterizationSupported() const {
264 return ext_line_rasterization;
265 }
266
267 /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state.
268 bool IsExtVertexInputDynamicStateSupported() const {
269 return ext_vertex_input_dynamic_state;
270 }
271
206 /// Returns true if the device supports VK_EXT_shader_stencil_export. 272 /// Returns true if the device supports VK_EXT_shader_stencil_export.
207 bool IsExtShaderStencilExportSupported() const { 273 bool IsExtShaderStencilExportSupported() const {
208 return ext_shader_stencil_export; 274 return ext_shader_stencil_export;
209 } 275 }
210 276
277 /// Returns true if the device supports VK_EXT_conservative_rasterization.
278 bool IsExtConservativeRasterizationSupported() const {
279 return ext_conservative_rasterization;
280 }
281
282 /// Returns true if the device supports VK_EXT_provoking_vertex.
283 bool IsExtProvokingVertexSupported() const {
284 return ext_provoking_vertex;
285 }
286
287 /// Returns true if the device supports VK_KHR_shader_atomic_int64.
288 bool IsExtShaderAtomicInt64Supported() const {
289 return ext_shader_atomic_int64;
290 }
291
211 /// Returns true when a known debugging tool is attached. 292 /// Returns true when a known debugging tool is attached.
212 bool HasDebuggingToolAttached() const { 293 bool HasDebuggingToolAttached() const {
213 return has_renderdoc || has_nsight_graphics; 294 return has_renderdoc || has_nsight_graphics;
@@ -220,12 +301,7 @@ public:
220 301
221 /// Returns the list of available extensions. 302 /// Returns the list of available extensions.
222 const std::vector<std::string>& GetAvailableExtensions() const { 303 const std::vector<std::string>& GetAvailableExtensions() const {
223 return reported_extensions; 304 return supported_extensions;
224 }
225
226 /// Returns true if the setting for async shader compilation is enabled.
227 bool UseAsynchronousShaders() const {
228 return use_asynchronous_shaders;
229 } 305 }
230 306
231 u64 GetDeviceLocalMemory() const { 307 u64 GetDeviceLocalMemory() const {
@@ -245,6 +321,9 @@ private:
245 /// Sets up device features. 321 /// Sets up device features.
246 void SetupFeatures(); 322 void SetupFeatures();
247 323
324 /// Sets up device properties.
325 void SetupProperties();
326
248 /// Collects telemetry information from the device. 327 /// Collects telemetry information from the device.
249 void CollectTelemetryParameters(); 328 void CollectTelemetryParameters();
250 329
@@ -267,46 +346,60 @@ private:
267 bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, 346 bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
268 FormatType format_type) const; 347 FormatType format_type) const;
269 348
270 VkInstance instance; ///< Vulkan instance. 349 VkInstance instance; ///< Vulkan instance.
271 vk::DeviceDispatch dld; ///< Device function pointers. 350 vk::DeviceDispatch dld; ///< Device function pointers.
272 vk::PhysicalDevice physical; ///< Physical device. 351 vk::PhysicalDevice physical; ///< Physical device.
273 VkPhysicalDeviceProperties properties; ///< Device properties. 352 VkPhysicalDeviceProperties properties; ///< Device properties.
274 vk::Device logical; ///< Logical device. 353 VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties.
275 vk::Queue graphics_queue; ///< Main graphics queue. 354 vk::Device logical; ///< Logical device.
276 vk::Queue present_queue; ///< Main present queue. 355 vk::Queue graphics_queue; ///< Main graphics queue.
277 u32 instance_version{}; ///< Vulkan onstance version. 356 vk::Queue present_queue; ///< Main present queue.
357 u32 instance_version{}; ///< Vulkan onstance version.
278 u32 graphics_family{}; ///< Main graphics queue family index. 358 u32 graphics_family{}; ///< Main graphics queue family index.
279 u32 present_family{}; ///< Main present queue family index. 359 u32 present_family{}; ///< Main present queue family index.
280 VkDriverIdKHR driver_id{}; ///< Driver ID. 360 VkDriverIdKHR driver_id{}; ///< Driver ID.
281 VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. 361 VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
282 u64 device_access_memory{}; ///< Total size of device local memory in bytes. 362 u64 device_access_memory{}; ///< Total size of device local memory in bytes.
363 u32 max_push_descriptors{}; ///< Maximum number of push descriptors
283 bool is_optimal_astc_supported{}; ///< Support for native ASTC. 364 bool is_optimal_astc_supported{}; ///< Support for native ASTC.
284 bool is_float16_supported{}; ///< Support for float16 arithmetics. 365 bool is_float16_supported{}; ///< Support for float16 arithmetics.
285 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. 366 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
286 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. 367 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
368 bool is_depth_bounds_supported{}; ///< Support for depth bounds.
369 bool is_shader_float64_supported{}; ///< Support for float64.
370 bool is_shader_int64_supported{}; ///< Support for int64.
371 bool is_shader_int16_supported{}; ///< Support for int16.
287 bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. 372 bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images.
288 bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. 373 bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
289 bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. 374 bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
290 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. 375 bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2.
291 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. 376 bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough.
292 bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. 377 bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts.
293 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. 378 bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4.
294 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. 379 bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
295 bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. 380 bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor.
296 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. 381 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
297 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. 382 bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
298 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. 383 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
299 bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. 384 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
300 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. 385 bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
301 bool has_renderdoc{}; ///< Has RenderDoc attached 386 bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control.
302 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached 387 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
303 388 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
304 // Asynchronous Graphics Pipeline setting 389 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
305 bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline 390 bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization.
391 bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state.
392 bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
393 bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64.
394 bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization.
395 bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex.
396 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
397 bool has_renderdoc{}; ///< Has RenderDoc attached
398 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
306 399
307 // Telemetry parameters 400 // Telemetry parameters
308 std::string vendor_name; ///< Device's driver name. 401 std::string vendor_name; ///< Device's driver name.
309 std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. 402 std::vector<std::string> supported_extensions; ///< Reported Vulkan extensions.
310 403
311 /// Format properties dictionary. 404 /// Format properties dictionary.
312 std::unordered_map<VkFormat, VkFormatProperties> format_properties; 405 std::unordered_map<VkFormat, VkFormatProperties> format_properties;
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 2aa0ffbe6..bbf0fccae 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -103,6 +103,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
103 X(vkCmdFillBuffer); 103 X(vkCmdFillBuffer);
104 X(vkCmdPipelineBarrier); 104 X(vkCmdPipelineBarrier);
105 X(vkCmdPushConstants); 105 X(vkCmdPushConstants);
106 X(vkCmdPushDescriptorSetWithTemplateKHR);
106 X(vkCmdSetBlendConstants); 107 X(vkCmdSetBlendConstants);
107 X(vkCmdSetDepthBias); 108 X(vkCmdSetDepthBias);
108 X(vkCmdSetDepthBounds); 109 X(vkCmdSetDepthBounds);
@@ -120,9 +121,11 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
120 X(vkCmdSetDepthTestEnableEXT); 121 X(vkCmdSetDepthTestEnableEXT);
121 X(vkCmdSetDepthWriteEnableEXT); 122 X(vkCmdSetDepthWriteEnableEXT);
122 X(vkCmdSetFrontFaceEXT); 123 X(vkCmdSetFrontFaceEXT);
124 X(vkCmdSetLineWidth);
123 X(vkCmdSetPrimitiveTopologyEXT); 125 X(vkCmdSetPrimitiveTopologyEXT);
124 X(vkCmdSetStencilOpEXT); 126 X(vkCmdSetStencilOpEXT);
125 X(vkCmdSetStencilTestEnableEXT); 127 X(vkCmdSetStencilTestEnableEXT);
128 X(vkCmdSetVertexInputEXT);
126 X(vkCmdResolveImage); 129 X(vkCmdResolveImage);
127 X(vkCreateBuffer); 130 X(vkCreateBuffer);
128 X(vkCreateBufferView); 131 X(vkCreateBufferView);
@@ -311,8 +314,6 @@ const char* ToString(VkResult result) noexcept {
311 return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; 314 return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT";
312 case VkResult::VK_ERROR_UNKNOWN: 315 case VkResult::VK_ERROR_UNKNOWN:
313 return "VK_ERROR_UNKNOWN"; 316 return "VK_ERROR_UNKNOWN";
314 case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR:
315 return "VK_ERROR_INCOMPATIBLE_VERSION_KHR";
316 case VkResult::VK_THREAD_IDLE_KHR: 317 case VkResult::VK_THREAD_IDLE_KHR:
317 return "VK_THREAD_IDLE_KHR"; 318 return "VK_THREAD_IDLE_KHR";
318 case VkResult::VK_THREAD_DONE_KHR: 319 case VkResult::VK_THREAD_DONE_KHR:
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 3e36d356a..d76bb4324 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -193,15 +193,16 @@ struct DeviceDispatch : InstanceDispatch {
193 PFN_vkBeginCommandBuffer vkBeginCommandBuffer{}; 193 PFN_vkBeginCommandBuffer vkBeginCommandBuffer{};
194 PFN_vkBindBufferMemory vkBindBufferMemory{}; 194 PFN_vkBindBufferMemory vkBindBufferMemory{};
195 PFN_vkBindImageMemory vkBindImageMemory{}; 195 PFN_vkBindImageMemory vkBindImageMemory{};
196 PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{};
196 PFN_vkCmdBeginQuery vkCmdBeginQuery{}; 197 PFN_vkCmdBeginQuery vkCmdBeginQuery{};
197 PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{}; 198 PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{};
198 PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{}; 199 PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{};
199 PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{};
200 PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{}; 200 PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{};
201 PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{}; 201 PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{};
202 PFN_vkCmdBindPipeline vkCmdBindPipeline{}; 202 PFN_vkCmdBindPipeline vkCmdBindPipeline{};
203 PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{}; 203 PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{};
204 PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{}; 204 PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{};
205 PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{};
205 PFN_vkCmdBlitImage vkCmdBlitImage{}; 206 PFN_vkCmdBlitImage vkCmdBlitImage{};
206 PFN_vkCmdClearAttachments vkCmdClearAttachments{}; 207 PFN_vkCmdClearAttachments vkCmdClearAttachments{};
207 PFN_vkCmdCopyBuffer vkCmdCopyBuffer{}; 208 PFN_vkCmdCopyBuffer vkCmdCopyBuffer{};
@@ -211,34 +212,36 @@ struct DeviceDispatch : InstanceDispatch {
211 PFN_vkCmdDispatch vkCmdDispatch{}; 212 PFN_vkCmdDispatch vkCmdDispatch{};
212 PFN_vkCmdDraw vkCmdDraw{}; 213 PFN_vkCmdDraw vkCmdDraw{};
213 PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; 214 PFN_vkCmdDrawIndexed vkCmdDrawIndexed{};
215 PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
214 PFN_vkCmdEndQuery vkCmdEndQuery{}; 216 PFN_vkCmdEndQuery vkCmdEndQuery{};
215 PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; 217 PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
216 PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{}; 218 PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{};
217 PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
218 PFN_vkCmdFillBuffer vkCmdFillBuffer{}; 219 PFN_vkCmdFillBuffer vkCmdFillBuffer{};
219 PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{}; 220 PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{};
220 PFN_vkCmdPushConstants vkCmdPushConstants{}; 221 PFN_vkCmdPushConstants vkCmdPushConstants{};
222 PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR{};
223 PFN_vkCmdResolveImage vkCmdResolveImage{};
221 PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{}; 224 PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{};
225 PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{};
222 PFN_vkCmdSetDepthBias vkCmdSetDepthBias{}; 226 PFN_vkCmdSetDepthBias vkCmdSetDepthBias{};
223 PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{}; 227 PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{};
224 PFN_vkCmdSetEvent vkCmdSetEvent{};
225 PFN_vkCmdSetScissor vkCmdSetScissor{};
226 PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{};
227 PFN_vkCmdSetStencilReference vkCmdSetStencilReference{};
228 PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{};
229 PFN_vkCmdSetViewport vkCmdSetViewport{};
230 PFN_vkCmdWaitEvents vkCmdWaitEvents{};
231 PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{};
232 PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{};
233 PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{}; 228 PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{};
234 PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{}; 229 PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{};
235 PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{}; 230 PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{};
236 PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{}; 231 PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{};
232 PFN_vkCmdSetEvent vkCmdSetEvent{};
237 PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{}; 233 PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{};
234 PFN_vkCmdSetLineWidth vkCmdSetLineWidth{};
238 PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; 235 PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{};
236 PFN_vkCmdSetScissor vkCmdSetScissor{};
237 PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{};
239 PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{}; 238 PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{};
239 PFN_vkCmdSetStencilReference vkCmdSetStencilReference{};
240 PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{}; 240 PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{};
241 PFN_vkCmdResolveImage vkCmdResolveImage{}; 241 PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{};
242 PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{};
243 PFN_vkCmdSetViewport vkCmdSetViewport{};
244 PFN_vkCmdWaitEvents vkCmdWaitEvents{};
242 PFN_vkCreateBuffer vkCreateBuffer{}; 245 PFN_vkCreateBuffer vkCreateBuffer{};
243 PFN_vkCreateBufferView vkCreateBufferView{}; 246 PFN_vkCreateBufferView vkCreateBufferView{};
244 PFN_vkCreateCommandPool vkCreateCommandPool{}; 247 PFN_vkCreateCommandPool vkCreateCommandPool{};
@@ -989,6 +992,12 @@ public:
989 dynamic_offsets.size(), dynamic_offsets.data()); 992 dynamic_offsets.size(), dynamic_offsets.data());
990 } 993 }
991 994
995 void PushDescriptorSetWithTemplateKHR(VkDescriptorUpdateTemplateKHR update_template,
996 VkPipelineLayout layout, u32 set,
997 const void* data) const noexcept {
998 dld->vkCmdPushDescriptorSetWithTemplateKHR(handle, update_template, layout, set, data);
999 }
1000
992 void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept { 1001 void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept {
993 dld->vkCmdBindPipeline(handle, bind_point, pipeline); 1002 dld->vkCmdBindPipeline(handle, bind_point, pipeline);
994 } 1003 }
@@ -1190,6 +1199,10 @@ public:
1190 dld->vkCmdSetFrontFaceEXT(handle, front_face); 1199 dld->vkCmdSetFrontFaceEXT(handle, front_face);
1191 } 1200 }
1192 1201
1202 void SetLineWidth(float line_width) const noexcept {
1203 dld->vkCmdSetLineWidth(handle, line_width);
1204 }
1205
1193 void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept { 1206 void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept {
1194 dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology); 1207 dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology);
1195 } 1208 }
@@ -1203,6 +1216,13 @@ public:
1203 dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); 1216 dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
1204 } 1217 }
1205 1218
1219 void SetVertexInputEXT(
1220 vk::Span<VkVertexInputBindingDescription2EXT> bindings,
1221 vk::Span<VkVertexInputAttributeDescription2EXT> attributes) const noexcept {
1222 dld->vkCmdSetVertexInputEXT(handle, bindings.size(), bindings.data(), attributes.size(),
1223 attributes.data());
1224 }
1225
1206 void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, 1226 void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
1207 const VkDeviceSize* offsets, 1227 const VkDeviceSize* offsets,
1208 const VkDeviceSize* sizes) const noexcept { 1228 const VkDeviceSize* sizes) const noexcept {
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index d72ca5acc..25b658b2a 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -64,12 +64,13 @@ void EmuThread::run() {
64 64
65 emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); 65 emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
66 66
67 system.Renderer().ReadRasterizer()->LoadDiskResources( 67 if (Settings::values.use_disk_shader_cache.GetValue()) {
68 system.CurrentProcess()->GetTitleID(), stop_token, 68 system.Renderer().ReadRasterizer()->LoadDiskResources(
69 [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { 69 system.CurrentProcess()->GetTitleID(), stop_token,
70 emit LoadProgress(stage, value, total); 70 [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
71 }); 71 emit LoadProgress(stage, value, total);
72 72 });
73 }
73 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); 74 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
74 75
75 gpu.ReleaseContext(); 76 gpu.ReleaseContext();
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 9fe5613b6..85c37b842 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -814,7 +814,7 @@ void Config::ReadRendererValues() {
814 ReadGlobalSetting(Settings::values.use_nvdec_emulation); 814 ReadGlobalSetting(Settings::values.use_nvdec_emulation);
815 ReadGlobalSetting(Settings::values.accelerate_astc); 815 ReadGlobalSetting(Settings::values.accelerate_astc);
816 ReadGlobalSetting(Settings::values.use_vsync); 816 ReadGlobalSetting(Settings::values.use_vsync);
817 ReadGlobalSetting(Settings::values.use_assembly_shaders); 817 ReadGlobalSetting(Settings::values.shader_backend);
818 ReadGlobalSetting(Settings::values.use_asynchronous_shaders); 818 ReadGlobalSetting(Settings::values.use_asynchronous_shaders);
819 ReadGlobalSetting(Settings::values.use_fast_gpu_time); 819 ReadGlobalSetting(Settings::values.use_fast_gpu_time);
820 ReadGlobalSetting(Settings::values.use_caches_gc); 820 ReadGlobalSetting(Settings::values.use_caches_gc);
@@ -824,6 +824,8 @@ void Config::ReadRendererValues() {
824 824
825 if (global) { 825 if (global) {
826 ReadBasicSetting(Settings::values.renderer_debug); 826 ReadBasicSetting(Settings::values.renderer_debug);
827 ReadBasicSetting(Settings::values.enable_nsight_aftermath);
828 ReadBasicSetting(Settings::values.disable_shader_loop_safety_checks);
827 } 829 }
828 830
829 qt_config->endGroup(); 831 qt_config->endGroup();
@@ -1346,7 +1348,10 @@ void Config::SaveRendererValues() {
1346 WriteGlobalSetting(Settings::values.use_nvdec_emulation); 1348 WriteGlobalSetting(Settings::values.use_nvdec_emulation);
1347 WriteGlobalSetting(Settings::values.accelerate_astc); 1349 WriteGlobalSetting(Settings::values.accelerate_astc);
1348 WriteGlobalSetting(Settings::values.use_vsync); 1350 WriteGlobalSetting(Settings::values.use_vsync);
1349 WriteGlobalSetting(Settings::values.use_assembly_shaders); 1351 WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()),
1352 static_cast<u32>(Settings::values.shader_backend.GetValue(global)),
1353 static_cast<u32>(Settings::values.shader_backend.GetDefault()),
1354 Settings::values.shader_backend.UsingGlobal());
1350 WriteGlobalSetting(Settings::values.use_asynchronous_shaders); 1355 WriteGlobalSetting(Settings::values.use_asynchronous_shaders);
1351 WriteGlobalSetting(Settings::values.use_fast_gpu_time); 1356 WriteGlobalSetting(Settings::values.use_fast_gpu_time);
1352 WriteGlobalSetting(Settings::values.use_caches_gc); 1357 WriteGlobalSetting(Settings::values.use_caches_gc);
@@ -1356,6 +1361,8 @@ void Config::SaveRendererValues() {
1356 1361
1357 if (global) { 1362 if (global) {
1358 WriteBasicSetting(Settings::values.renderer_debug); 1363 WriteBasicSetting(Settings::values.renderer_debug);
1364 WriteBasicSetting(Settings::values.enable_nsight_aftermath);
1365 WriteBasicSetting(Settings::values.disable_shader_loop_safety_checks);
1359 } 1366 }
1360 1367
1361 qt_config->endGroup(); 1368 qt_config->endGroup();
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index 24950e8f8..c1d7feb9f 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -180,6 +180,7 @@ private:
180 180
181// These metatype declarations cannot be in common/settings.h because core is devoid of QT 181// These metatype declarations cannot be in common/settings.h because core is devoid of QT
182Q_DECLARE_METATYPE(Settings::CPUAccuracy); 182Q_DECLARE_METATYPE(Settings::CPUAccuracy);
183Q_DECLARE_METATYPE(Settings::RendererBackend);
184Q_DECLARE_METATYPE(Settings::GPUAccuracy); 183Q_DECLARE_METATYPE(Settings::GPUAccuracy);
185Q_DECLARE_METATYPE(Settings::FullscreenMode); 184Q_DECLARE_METATYPE(Settings::FullscreenMode);
185Q_DECLARE_METATYPE(Settings::RendererBackend);
186Q_DECLARE_METATYPE(Settings::ShaderBackend);
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp
index 8fceb3878..f7e29dbd7 100644
--- a/src/yuzu/configuration/configure_debug.cpp
+++ b/src/yuzu/configuration/configure_debug.cpp
@@ -45,8 +45,13 @@ void ConfigureDebug::SetConfiguration() {
45 ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue()); 45 ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue());
46 ui->enable_cpu_debugging->setEnabled(runtime_lock); 46 ui->enable_cpu_debugging->setEnabled(runtime_lock);
47 ui->enable_cpu_debugging->setChecked(Settings::values.cpu_debug_mode.GetValue()); 47 ui->enable_cpu_debugging->setChecked(Settings::values.cpu_debug_mode.GetValue());
48 ui->enable_nsight_aftermath->setEnabled(runtime_lock);
49 ui->enable_nsight_aftermath->setChecked(Settings::values.enable_nsight_aftermath.GetValue());
48 ui->disable_macro_jit->setEnabled(runtime_lock); 50 ui->disable_macro_jit->setEnabled(runtime_lock);
49 ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue()); 51 ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue());
52 ui->disable_loop_safety_checks->setEnabled(runtime_lock);
53 ui->disable_loop_safety_checks->setChecked(
54 Settings::values.disable_shader_loop_safety_checks.GetValue());
50 ui->extended_logging->setChecked(Settings::values.extended_logging.GetValue()); 55 ui->extended_logging->setChecked(Settings::values.extended_logging.GetValue());
51} 56}
52 57
@@ -61,6 +66,9 @@ void ConfigureDebug::ApplyConfiguration() {
61 Settings::values.use_auto_stub = ui->use_auto_stub->isChecked(); 66 Settings::values.use_auto_stub = ui->use_auto_stub->isChecked();
62 Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); 67 Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked();
63 Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked(); 68 Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked();
69 Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked();
70 Settings::values.disable_shader_loop_safety_checks =
71 ui->disable_loop_safety_checks->isChecked();
64 Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked(); 72 Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked();
65 Settings::values.extended_logging = ui->extended_logging->isChecked(); 73 Settings::values.extended_logging = ui->extended_logging->isChecked();
66 Debugger::ToggleConsole(); 74 Debugger::ToggleConsole();
diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui
index 1260ad6f0..c8baf2921 100644
--- a/src/yuzu/configuration/configure_debug.ui
+++ b/src/yuzu/configuration/configure_debug.ui
@@ -126,6 +126,16 @@
126 </widget> 126 </widget>
127 </item> 127 </item>
128 <item> 128 <item>
129 <widget class="QCheckBox" name="enable_nsight_aftermath">
130 <property name="toolTip">
131 <string>When checked, it enables Nsight Aftermath crash dumps</string>
132 </property>
133 <property name="text">
134 <string>Enable Nsight Aftermath</string>
135 </property>
136 </widget>
137 </item>
138 <item>
129 <widget class="QCheckBox" name="disable_macro_jit"> 139 <widget class="QCheckBox" name="disable_macro_jit">
130 <property name="enabled"> 140 <property name="enabled">
131 <bool>true</bool> 141 <bool>true</bool>
@@ -138,6 +148,16 @@
138 </property> 148 </property>
139 </widget> 149 </widget>
140 </item> 150 </item>
151 <item>
152 <widget class="QCheckBox" name="disable_loop_safety_checks">
153 <property name="toolTip">
154 <string>When checked, it executes shaders without loop logic changes</string>
155 </property>
156 <property name="text">
157 <string>Disable Loop safety checks</string>
158 </property>
159 </widget>
160 </item>
141 </layout> 161 </layout>
142 </widget> 162 </widget>
143 </item> 163 </item>
@@ -252,11 +272,17 @@
252 <tabstops> 272 <tabstops>
253 <tabstop>log_filter_edit</tabstop> 273 <tabstop>log_filter_edit</tabstop>
254 <tabstop>toggle_console</tabstop> 274 <tabstop>toggle_console</tabstop>
275 <tabstop>extended_logging</tabstop>
255 <tabstop>open_log_button</tabstop> 276 <tabstop>open_log_button</tabstop>
256 <tabstop>homebrew_args_edit</tabstop> 277 <tabstop>homebrew_args_edit</tabstop>
257 <tabstop>enable_graphics_debugging</tabstop> 278 <tabstop>enable_graphics_debugging</tabstop>
279 <tabstop>enable_nsight_aftermath</tabstop>
280 <tabstop>disable_macro_jit</tabstop>
281 <tabstop>disable_loop_safety_checks</tabstop>
258 <tabstop>reporting_services</tabstop> 282 <tabstop>reporting_services</tabstop>
259 <tabstop>quest_flag</tabstop> 283 <tabstop>quest_flag</tabstop>
284 <tabstop>use_debug_asserts</tabstop>
285 <tabstop>use_auto_stub</tabstop>
260 </tabstops> 286 </tabstops>
261 <resources/> 287 <resources/>
262 <connections/> 288 <connections/>
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 6287a3caa..4a5b17740 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -26,19 +26,29 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
26 26
27 ui->setupUi(this); 27 ui->setupUi(this);
28 28
29 for (const auto& device : vulkan_devices) {
30 ui->device->addItem(device);
31 }
32
33 ui->backend->addItem(QStringLiteral("GLSL"));
34 ui->backend->addItem(tr("GLASM (NVIDIA Only)"));
35 ui->backend->addItem(QStringLiteral("SPIR-V (Experimental, Mesa Only)"));
36
29 SetupPerGameUI(); 37 SetupPerGameUI();
30 38
31 SetConfiguration(); 39 SetConfiguration();
32 40
33 connect(ui->api, qOverload<int>(&QComboBox::currentIndexChanged), this, [this] { 41 connect(ui->api, qOverload<int>(&QComboBox::currentIndexChanged), this, [this] {
34 UpdateDeviceComboBox(); 42 UpdateAPILayout();
35 if (!Settings::IsConfiguringGlobal()) { 43 if (!Settings::IsConfiguringGlobal()) {
36 ConfigurationShared::SetHighlight( 44 ConfigurationShared::SetHighlight(
37 ui->api_layout, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX); 45 ui->api_widget, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX);
38 } 46 }
39 }); 47 });
40 connect(ui->device, qOverload<int>(&QComboBox::activated), this, 48 connect(ui->device, qOverload<int>(&QComboBox::activated), this,
41 [this](int device) { UpdateDeviceSelection(device); }); 49 [this](int device) { UpdateDeviceSelection(device); });
50 connect(ui->backend, qOverload<int>(&QComboBox::activated), this,
51 [this](int backend) { UpdateShaderBackendSelection(backend); });
42 52
43 connect(ui->bg_button, &QPushButton::clicked, this, [this] { 53 connect(ui->bg_button, &QPushButton::clicked, this, [this] {
44 const QColor new_bg_color = QColorDialog::getColor(bg_color); 54 const QColor new_bg_color = QColorDialog::getColor(bg_color);
@@ -61,12 +71,21 @@ void ConfigureGraphics::UpdateDeviceSelection(int device) {
61 } 71 }
62} 72}
63 73
74void ConfigureGraphics::UpdateShaderBackendSelection(int backend) {
75 if (backend == -1) {
76 return;
77 }
78 if (GetCurrentGraphicsBackend() == Settings::RendererBackend::OpenGL) {
79 shader_backend = static_cast<Settings::ShaderBackend>(backend);
80 }
81}
82
64ConfigureGraphics::~ConfigureGraphics() = default; 83ConfigureGraphics::~ConfigureGraphics() = default;
65 84
66void ConfigureGraphics::SetConfiguration() { 85void ConfigureGraphics::SetConfiguration() {
67 const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); 86 const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
68 87
69 ui->api->setEnabled(runtime_lock); 88 ui->api_widget->setEnabled(runtime_lock);
70 ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); 89 ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock);
71 ui->use_disk_shader_cache->setEnabled(runtime_lock); 90 ui->use_disk_shader_cache->setEnabled(runtime_lock);
72 ui->use_nvdec_emulation->setEnabled(runtime_lock); 91 ui->use_nvdec_emulation->setEnabled(runtime_lock);
@@ -84,7 +103,7 @@ void ConfigureGraphics::SetConfiguration() {
84 ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); 103 ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue());
85 } else { 104 } else {
86 ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); 105 ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend);
87 ConfigurationShared::SetHighlight(ui->api_layout, 106 ConfigurationShared::SetHighlight(ui->api_widget,
88 !Settings::values.renderer_backend.UsingGlobal()); 107 !Settings::values.renderer_backend.UsingGlobal());
89 108
90 ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox, 109 ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox,
@@ -101,11 +120,10 @@ void ConfigureGraphics::SetConfiguration() {
101 ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal()); 120 ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal());
102 ConfigurationShared::SetHighlight(ui->bg_layout, !Settings::values.bg_red.UsingGlobal()); 121 ConfigurationShared::SetHighlight(ui->bg_layout, !Settings::values.bg_red.UsingGlobal());
103 } 122 }
104
105 UpdateBackgroundColorButton(QColor::fromRgb(Settings::values.bg_red.GetValue(), 123 UpdateBackgroundColorButton(QColor::fromRgb(Settings::values.bg_red.GetValue(),
106 Settings::values.bg_green.GetValue(), 124 Settings::values.bg_green.GetValue(),
107 Settings::values.bg_blue.GetValue())); 125 Settings::values.bg_blue.GetValue()));
108 UpdateDeviceComboBox(); 126 UpdateAPILayout();
109} 127}
110 128
111void ConfigureGraphics::ApplyConfiguration() { 129void ConfigureGraphics::ApplyConfiguration() {
@@ -129,6 +147,9 @@ void ConfigureGraphics::ApplyConfiguration() {
129 if (Settings::values.renderer_backend.UsingGlobal()) { 147 if (Settings::values.renderer_backend.UsingGlobal()) {
130 Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); 148 Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend());
131 } 149 }
150 if (Settings::values.shader_backend.UsingGlobal()) {
151 Settings::values.shader_backend.SetValue(shader_backend);
152 }
132 if (Settings::values.vulkan_device.UsingGlobal()) { 153 if (Settings::values.vulkan_device.UsingGlobal()) {
133 Settings::values.vulkan_device.SetValue(vulkan_device); 154 Settings::values.vulkan_device.SetValue(vulkan_device);
134 } 155 }
@@ -140,15 +161,22 @@ void ConfigureGraphics::ApplyConfiguration() {
140 } else { 161 } else {
141 if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { 162 if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
142 Settings::values.renderer_backend.SetGlobal(true); 163 Settings::values.renderer_backend.SetGlobal(true);
164 Settings::values.shader_backend.SetGlobal(true);
143 Settings::values.vulkan_device.SetGlobal(true); 165 Settings::values.vulkan_device.SetGlobal(true);
144 } else { 166 } else {
145 Settings::values.renderer_backend.SetGlobal(false); 167 Settings::values.renderer_backend.SetGlobal(false);
146 Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); 168 Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend());
147 if (GetCurrentGraphicsBackend() == Settings::RendererBackend::Vulkan) { 169 switch (GetCurrentGraphicsBackend()) {
170 case Settings::RendererBackend::OpenGL:
171 Settings::values.shader_backend.SetGlobal(false);
172 Settings::values.vulkan_device.SetGlobal(true);
173 Settings::values.shader_backend.SetValue(shader_backend);
174 break;
175 case Settings::RendererBackend::Vulkan:
176 Settings::values.shader_backend.SetGlobal(true);
148 Settings::values.vulkan_device.SetGlobal(false); 177 Settings::values.vulkan_device.SetGlobal(false);
149 Settings::values.vulkan_device.SetValue(vulkan_device); 178 Settings::values.vulkan_device.SetValue(vulkan_device);
150 } else { 179 break;
151 Settings::values.vulkan_device.SetGlobal(true);
152 } 180 }
153 } 181 }
154 182
@@ -189,32 +217,32 @@ void ConfigureGraphics::UpdateBackgroundColorButton(QColor color) {
189 ui->bg_button->setIcon(color_icon); 217 ui->bg_button->setIcon(color_icon);
190} 218}
191 219
192void ConfigureGraphics::UpdateDeviceComboBox() { 220void ConfigureGraphics::UpdateAPILayout() {
193 ui->device->clear();
194
195 bool enabled = false;
196
197 if (!Settings::IsConfiguringGlobal() && 221 if (!Settings::IsConfiguringGlobal() &&
198 ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { 222 ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
223 vulkan_device = Settings::values.vulkan_device.GetValue(true);
224 shader_backend = Settings::values.shader_backend.GetValue(true);
225 ui->device_widget->setEnabled(false);
226 ui->backend_widget->setEnabled(false);
227 } else {
199 vulkan_device = Settings::values.vulkan_device.GetValue(); 228 vulkan_device = Settings::values.vulkan_device.GetValue();
229 shader_backend = Settings::values.shader_backend.GetValue();
230 ui->device_widget->setEnabled(true);
231 ui->backend_widget->setEnabled(true);
200 } 232 }
233
201 switch (GetCurrentGraphicsBackend()) { 234 switch (GetCurrentGraphicsBackend()) {
202 case Settings::RendererBackend::OpenGL: 235 case Settings::RendererBackend::OpenGL:
203 ui->device->addItem(tr("OpenGL Graphics Device")); 236 ui->backend->setCurrentIndex(static_cast<u32>(shader_backend));
204 enabled = false; 237 ui->device_widget->setVisible(false);
238 ui->backend_widget->setVisible(true);
205 break; 239 break;
206 case Settings::RendererBackend::Vulkan: 240 case Settings::RendererBackend::Vulkan:
207 for (const auto& device : vulkan_devices) {
208 ui->device->addItem(device);
209 }
210 ui->device->setCurrentIndex(vulkan_device); 241 ui->device->setCurrentIndex(vulkan_device);
211 enabled = !vulkan_devices.empty(); 242 ui->device_widget->setVisible(true);
243 ui->backend_widget->setVisible(false);
212 break; 244 break;
213 } 245 }
214 // If in per-game config and use global is selected, don't enable.
215 enabled &= !(!Settings::IsConfiguringGlobal() &&
216 ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX);
217 ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn());
218} 246}
219 247
220void ConfigureGraphics::RetrieveVulkanDevices() try { 248void ConfigureGraphics::RetrieveVulkanDevices() try {
diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h
index 6418115cf..c866b911b 100644
--- a/src/yuzu/configuration/configure_graphics.h
+++ b/src/yuzu/configuration/configure_graphics.h
@@ -34,8 +34,9 @@ private:
34 void SetConfiguration(); 34 void SetConfiguration();
35 35
36 void UpdateBackgroundColorButton(QColor color); 36 void UpdateBackgroundColorButton(QColor color);
37 void UpdateDeviceComboBox(); 37 void UpdateAPILayout();
38 void UpdateDeviceSelection(int device); 38 void UpdateDeviceSelection(int device);
39 void UpdateShaderBackendSelection(int backend);
39 40
40 void RetrieveVulkanDevices(); 41 void RetrieveVulkanDevices();
41 42
@@ -53,4 +54,5 @@ private:
53 54
54 std::vector<QString> vulkan_devices; 55 std::vector<QString> vulkan_devices;
55 u32 vulkan_device{}; 56 u32 vulkan_device{};
57 Settings::ShaderBackend shader_backend{};
56}; 58};
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index 5b999d84d..099ddbb7c 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -23,7 +23,7 @@
23 </property> 23 </property>
24 <layout class="QVBoxLayout" name="verticalLayout_3"> 24 <layout class="QVBoxLayout" name="verticalLayout_3">
25 <item> 25 <item>
26 <widget class="QWidget" name="api_layout" native="true"> 26 <widget class="QWidget" name="api_widget" native="true">
27 <layout class="QGridLayout" name="gridLayout"> 27 <layout class="QGridLayout" name="gridLayout">
28 <property name="leftMargin"> 28 <property name="leftMargin">
29 <number>0</number> 29 <number>0</number>
@@ -40,37 +40,107 @@
40 <property name="horizontalSpacing"> 40 <property name="horizontalSpacing">
41 <number>6</number> 41 <number>6</number>
42 </property> 42 </property>
43 <item row="0" column="0"> 43 <item row="4" column="0">
44 <widget class="QLabel" name="api_label"> 44 <widget class="QWidget" name="backend_widget" native="true">
45 <property name="text"> 45 <layout class="QHBoxLayout" name="backend_layout">
46 <string>API:</string> 46 <property name="leftMargin">
47 </property> 47 <number>0</number>
48 </property>
49 <property name="topMargin">
50 <number>0</number>
51 </property>
52 <property name="rightMargin">
53 <number>0</number>
54 </property>
55 <property name="bottomMargin">
56 <number>0</number>
57 </property>
58 <item>
59 <widget class="QLabel" name="backend_label">
60 <property name="text">
61 <string>Shader Backend:</string>
62 </property>
63 </widget>
64 </item>
65 <item>
66 <widget class="QComboBox" name="backend"/>
67 </item>
68 </layout>
48 </widget> 69 </widget>
49 </item> 70 </item>
50 <item row="0" column="1"> 71 <item row="2" column="0">
51 <widget class="QComboBox" name="api"> 72 <widget class="QWidget" name="device_widget" native="true">
52 <item> 73 <layout class="QHBoxLayout" name="device_layout">
53 <property name="text"> 74 <property name="leftMargin">
54 <string notr="true">OpenGL</string> 75 <number>0</number>
55 </property> 76 </property>
56 </item> 77 <property name="topMargin">
57 <item> 78 <number>0</number>
58 <property name="text">
59 <string notr="true">Vulkan</string>
60 </property> 79 </property>
61 </item> 80 <property name="rightMargin">
81 <number>0</number>
82 </property>
83 <property name="bottomMargin">
84 <number>0</number>
85 </property>
86 <item>
87 <widget class="QLabel" name="device_label">
88 <property name="text">
89 <string>Device:</string>
90 </property>
91 </widget>
92 </item>
93 <item>
94 <widget class="QComboBox" name="device"/>
95 </item>
96 </layout>
62 </widget> 97 </widget>
63 </item> 98 </item>
64 <item row="1" column="0"> 99 <item row="0" column="0">
65 <widget class="QLabel" name="device_label"> 100 <widget class="QWidget" name="api_layout_2" native="true">
66 <property name="text"> 101 <layout class="QHBoxLayout" name="api_layout">
67 <string>Device:</string> 102 <property name="leftMargin">
68 </property> 103 <number>0</number>
104 </property>
105 <property name="topMargin">
106 <number>0</number>
107 </property>
108 <property name="rightMargin">
109 <number>0</number>
110 </property>
111 <property name="bottomMargin">
112 <number>0</number>
113 </property>
114 <item>
115 <widget class="QLabel" name="api_label">
116 <property name="text">
117 <string>API:</string>
118 </property>
119 </widget>
120 </item>
121 <item>
122 <widget class="QComboBox" name="api">
123 <property name="sizePolicy">
124 <sizepolicy hsizetype="Preferred" vsizetype="Fixed">
125 <horstretch>0</horstretch>
126 <verstretch>0</verstretch>
127 </sizepolicy>
128 </property>
129 <item>
130 <property name="text">
131 <string notr="true">OpenGL</string>
132 </property>
133 </item>
134 <item>
135 <property name="text">
136 <string notr="true">Vulkan</string>
137 </property>
138 </item>
139 </widget>
140 </item>
141 </layout>
69 </widget> 142 </widget>
70 </item> 143 </item>
71 <item row="1" column="1">
72 <widget class="QComboBox" name="device"/>
73 </item>
74 </layout> 144 </layout>
75 </widget> 145 </widget>
76 </item> 146 </item>
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index e952777ab..a31b8e192 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -23,12 +23,10 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
23void ConfigureGraphicsAdvanced::SetConfiguration() { 23void ConfigureGraphicsAdvanced::SetConfiguration() {
24 const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); 24 const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
25 ui->use_vsync->setEnabled(runtime_lock); 25 ui->use_vsync->setEnabled(runtime_lock);
26 ui->use_assembly_shaders->setEnabled(runtime_lock);
27 ui->use_asynchronous_shaders->setEnabled(runtime_lock); 26 ui->use_asynchronous_shaders->setEnabled(runtime_lock);
28 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); 27 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
29 28
30 ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); 29 ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
31 ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue());
32 ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); 30 ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
33 ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue()); 31 ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue());
34 ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); 32 ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
@@ -54,8 +52,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
54 ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, 52 ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy,
55 ui->anisotropic_filtering_combobox); 53 ui->anisotropic_filtering_combobox);
56 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync); 54 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync);
57 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_assembly_shaders,
58 ui->use_assembly_shaders, use_assembly_shaders);
59 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, 55 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,
60 ui->use_asynchronous_shaders, 56 ui->use_asynchronous_shaders,
61 use_asynchronous_shaders); 57 use_asynchronous_shaders);
@@ -82,7 +78,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
82 if (Settings::IsConfiguringGlobal()) { 78 if (Settings::IsConfiguringGlobal()) {
83 ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); 79 ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal());
84 ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); 80 ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal());
85 ui->use_assembly_shaders->setEnabled(Settings::values.use_assembly_shaders.UsingGlobal());
86 ui->use_asynchronous_shaders->setEnabled( 81 ui->use_asynchronous_shaders->setEnabled(
87 Settings::values.use_asynchronous_shaders.UsingGlobal()); 82 Settings::values.use_asynchronous_shaders.UsingGlobal());
88 ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); 83 ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
@@ -94,8 +89,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
94 } 89 }
95 90
96 ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync); 91 ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync);
97 ConfigurationShared::SetColoredTristate(
98 ui->use_assembly_shaders, Settings::values.use_assembly_shaders, use_assembly_shaders);
99 ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, 92 ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders,
100 Settings::values.use_asynchronous_shaders, 93 Settings::values.use_asynchronous_shaders,
101 use_asynchronous_shaders); 94 use_asynchronous_shaders);
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h
index 9148aacf2..7356e6916 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.h
+++ b/src/yuzu/configuration/configure_graphics_advanced.h
@@ -35,7 +35,6 @@ private:
35 std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; 35 std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui;
36 36
37 ConfigurationShared::CheckState use_vsync; 37 ConfigurationShared::CheckState use_vsync;
38 ConfigurationShared::CheckState use_assembly_shaders;
39 ConfigurationShared::CheckState use_asynchronous_shaders; 38 ConfigurationShared::CheckState use_asynchronous_shaders;
40 ConfigurationShared::CheckState use_fast_gpu_time; 39 ConfigurationShared::CheckState use_fast_gpu_time;
41 ConfigurationShared::CheckState use_caches_gc; 40 ConfigurationShared::CheckState use_caches_gc;
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index ad0840355..379dc5d2e 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -77,22 +77,12 @@
77 </widget> 77 </widget>
78 </item> 78 </item>
79 <item> 79 <item>
80 <widget class="QCheckBox" name="use_assembly_shaders">
81 <property name="toolTip">
82 <string>Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental.</string>
83 </property>
84 <property name="text">
85 <string>Use assembly shaders (experimental, Nvidia OpenGL only)</string>
86 </property>
87 </widget>
88 </item>
89 <item>
90 <widget class="QCheckBox" name="use_asynchronous_shaders"> 80 <widget class="QCheckBox" name="use_asynchronous_shaders">
91 <property name="toolTip"> 81 <property name="toolTip">
92 <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string> 82 <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string>
93 </property> 83 </property>
94 <property name="text"> 84 <property name="text">
95 <string>Use asynchronous shader building (experimental)</string> 85 <string>Use asynchronous shader building</string>
96 </property> 86 </property>
97 </widget> 87 </widget>
98 </item> 88 </item>
@@ -144,22 +134,22 @@
144 </item> 134 </item>
145 <item> 135 <item>
146 <property name="text"> 136 <property name="text">
147 <string>2x</string> 137 <string>2x (WILL BREAK THINGS)</string>
148 </property> 138 </property>
149 </item> 139 </item>
150 <item> 140 <item>
151 <property name="text"> 141 <property name="text">
152 <string>4x</string> 142 <string>4x (WILL BREAK THINGS)</string>
153 </property> 143 </property>
154 </item> 144 </item>
155 <item> 145 <item>
156 <property name="text"> 146 <property name="text">
157 <string>8x</string> 147 <string>8x (WILL BREAK THINGS)</string>
158 </property> 148 </property>
159 </item> 149 </item>
160 <item> 150 <item>
161 <property name="text"> 151 <property name="text">
162 <string>16x</string> 152 <string>16x (WILL BREAK THINGS)</string>
163 </property> 153 </property>
164 </item> 154 </item>
165 </widget> 155 </widget>
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index 76c063c97..f746bd85d 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -520,9 +520,11 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
520 QMenu* remove_menu = context_menu.addMenu(tr("Remove")); 520 QMenu* remove_menu = context_menu.addMenu(tr("Remove"));
521 QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update")); 521 QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update"));
522 QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC")); 522 QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC"));
523 QAction* remove_shader_cache = remove_menu->addAction(tr("Remove Shader Cache"));
524 QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration")); 523 QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration"));
524 QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Shader Cache"));
525 QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Shader Cache"));
525 remove_menu->addSeparator(); 526 remove_menu->addSeparator();
527 QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Shader Caches"));
526 QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents")); 528 QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents"));
527 QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS")); 529 QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS"));
528 QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS")); 530 QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS"));
@@ -540,6 +542,8 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
540 open_transferable_shader_cache->setVisible(program_id != 0); 542 open_transferable_shader_cache->setVisible(program_id != 0);
541 remove_update->setVisible(program_id != 0); 543 remove_update->setVisible(program_id != 0);
542 remove_dlc->setVisible(program_id != 0); 544 remove_dlc->setVisible(program_id != 0);
545 remove_gl_shader_cache->setVisible(program_id != 0);
546 remove_vk_shader_cache->setVisible(program_id != 0);
543 remove_shader_cache->setVisible(program_id != 0); 547 remove_shader_cache->setVisible(program_id != 0);
544 remove_all_content->setVisible(program_id != 0); 548 remove_all_content->setVisible(program_id != 0);
545 auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id); 549 auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id);
@@ -569,8 +573,14 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
569 connect(remove_dlc, &QAction::triggered, [this, program_id]() { 573 connect(remove_dlc, &QAction::triggered, [this, program_id]() {
570 emit RemoveInstalledEntryRequested(program_id, InstalledEntryType::AddOnContent); 574 emit RemoveInstalledEntryRequested(program_id, InstalledEntryType::AddOnContent);
571 }); 575 });
576 connect(remove_gl_shader_cache, &QAction::triggered, [this, program_id, path]() {
577 emit RemoveFileRequested(program_id, GameListRemoveTarget::GlShaderCache, path);
578 });
579 connect(remove_vk_shader_cache, &QAction::triggered, [this, program_id, path]() {
580 emit RemoveFileRequested(program_id, GameListRemoveTarget::VkShaderCache, path);
581 });
572 connect(remove_shader_cache, &QAction::triggered, [this, program_id, path]() { 582 connect(remove_shader_cache, &QAction::triggered, [this, program_id, path]() {
573 emit RemoveFileRequested(program_id, GameListRemoveTarget::ShaderCache, path); 583 emit RemoveFileRequested(program_id, GameListRemoveTarget::AllShaderCache, path);
574 }); 584 });
575 connect(remove_custom_config, &QAction::triggered, [this, program_id, path]() { 585 connect(remove_custom_config, &QAction::triggered, [this, program_id, path]() {
576 emit RemoveFileRequested(program_id, GameListRemoveTarget::CustomConfiguration, path); 586 emit RemoveFileRequested(program_id, GameListRemoveTarget::CustomConfiguration, path);
diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h
index c9a9f4654..10339dcca 100644
--- a/src/yuzu/game_list.h
+++ b/src/yuzu/game_list.h
@@ -41,7 +41,9 @@ enum class GameListOpenTarget {
41}; 41};
42 42
43enum class GameListRemoveTarget { 43enum class GameListRemoveTarget {
44 ShaderCache, 44 GlShaderCache,
45 VkShaderCache,
46 AllShaderCache,
45 CustomConfiguration, 47 CustomConfiguration,
46}; 48};
47 49
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 96a301dda..f848b2982 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -789,41 +789,28 @@ void GMainWindow::InitializeWidgets() {
789 dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue()); 789 dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue());
790 statusBar()->insertPermanentWidget(0, dock_status_button); 790 statusBar()->insertPermanentWidget(0, dock_status_button);
791 791
792 // Setup ASync button 792 gpu_accuracy_button = new QPushButton();
793 async_status_button = new QPushButton(); 793 gpu_accuracy_button->setObjectName(QStringLiteral("GPUStatusBarButton"));
794 async_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); 794 gpu_accuracy_button->setCheckable(true);
795 async_status_button->setFocusPolicy(Qt::NoFocus); 795 gpu_accuracy_button->setFocusPolicy(Qt::NoFocus);
796 connect(async_status_button, &QPushButton::clicked, [&] { 796 connect(gpu_accuracy_button, &QPushButton::clicked, [this] {
797 if (emulation_running) { 797 switch (Settings::values.gpu_accuracy.GetValue()) {
798 return; 798 case Settings::GPUAccuracy::High: {
799 Settings::values.gpu_accuracy.SetValue(Settings::GPUAccuracy::Normal);
800 break;
801 }
802 case Settings::GPUAccuracy::Normal:
803 case Settings::GPUAccuracy::Extreme:
804 default: {
805 Settings::values.gpu_accuracy.SetValue(Settings::GPUAccuracy::High);
799 } 806 }
800 Settings::values.use_asynchronous_gpu_emulation.SetValue(
801 !Settings::values.use_asynchronous_gpu_emulation.GetValue());
802 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue());
803 Core::System::GetInstance().ApplySettings();
804 });
805 async_status_button->setText(tr("ASYNC"));
806 async_status_button->setCheckable(true);
807 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue());
808
809 // Setup Multicore button
810 multicore_status_button = new QPushButton();
811 multicore_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton"));
812 multicore_status_button->setFocusPolicy(Qt::NoFocus);
813 connect(multicore_status_button, &QPushButton::clicked, [&] {
814 if (emulation_running) {
815 return;
816 } 807 }
817 Settings::values.use_multi_core.SetValue(!Settings::values.use_multi_core.GetValue()); 808
818 multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue());
819 Core::System::GetInstance().ApplySettings(); 809 Core::System::GetInstance().ApplySettings();
810 UpdateGPUAccuracyButton();
820 }); 811 });
821 multicore_status_button->setText(tr("MULTICORE")); 812 UpdateGPUAccuracyButton();
822 multicore_status_button->setCheckable(true); 813 statusBar()->insertPermanentWidget(0, gpu_accuracy_button);
823 multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue());
824
825 statusBar()->insertPermanentWidget(0, multicore_status_button);
826 statusBar()->insertPermanentWidget(0, async_status_button);
827 814
828 // Setup Renderer API button 815 // Setup Renderer API button
829 renderer_status_button = new QPushButton(); 816 renderer_status_button = new QPushButton();
@@ -1401,8 +1388,6 @@ void GMainWindow::BootGame(const QString& filename, u64 program_id, std::size_t
1401 game_list_placeholder->hide(); 1388 game_list_placeholder->hide();
1402 } 1389 }
1403 status_bar_update_timer.start(500); 1390 status_bar_update_timer.start(500);
1404 async_status_button->setDisabled(true);
1405 multicore_status_button->setDisabled(true);
1406 renderer_status_button->setDisabled(true); 1391 renderer_status_button->setDisabled(true);
1407 1392
1408 if (UISettings::values.hide_mouse || Settings::values.mouse_panning) { 1393 if (UISettings::values.hide_mouse || Settings::values.mouse_panning) {
@@ -1506,8 +1491,6 @@ void GMainWindow::ShutdownGame() {
1506 emu_speed_label->setVisible(false); 1491 emu_speed_label->setVisible(false);
1507 game_fps_label->setVisible(false); 1492 game_fps_label->setVisible(false);
1508 emu_frametime_label->setVisible(false); 1493 emu_frametime_label->setVisible(false);
1509 async_status_button->setEnabled(true);
1510 multicore_status_button->setEnabled(true);
1511 renderer_status_button->setEnabled(true); 1494 renderer_status_button->setEnabled(true);
1512 1495
1513 emulation_running = false; 1496 emulation_running = false;
@@ -1654,35 +1637,15 @@ void GMainWindow::OnGameListOpenFolder(u64 program_id, GameListOpenTarget target
1654 1637
1655void GMainWindow::OnTransferableShaderCacheOpenFile(u64 program_id) { 1638void GMainWindow::OnTransferableShaderCacheOpenFile(u64 program_id) {
1656 const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); 1639 const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir);
1657 const auto transferable_shader_cache_folder_path = shader_cache_dir / "opengl" / "transferable"; 1640 const auto shader_cache_folder_path{shader_cache_dir / fmt::format("{:016x}", program_id)};
1658 const auto transferable_shader_cache_file_path = 1641 if (!Common::FS::CreateDirs(shader_cache_folder_path)) {
1659 transferable_shader_cache_folder_path / fmt::format("{:016X}.bin", program_id);
1660
1661 if (!Common::FS::Exists(transferable_shader_cache_file_path)) {
1662 QMessageBox::warning(this, tr("Error Opening Transferable Shader Cache"), 1642 QMessageBox::warning(this, tr("Error Opening Transferable Shader Cache"),
1663 tr("A shader cache for this title does not exist.")); 1643 tr("Filed to create the shader cache directory for this title."));
1664 return; 1644 return;
1665 } 1645 }
1666 1646 const auto shader_path_string{Common::FS::PathToUTF8String(shader_cache_folder_path)};
1667 const auto qt_shader_cache_folder_path = 1647 const auto qt_shader_cache_path = QString::fromStdString(shader_path_string);
1668 QString::fromStdString(Common::FS::PathToUTF8String(transferable_shader_cache_folder_path)); 1648 QDesktopServices::openUrl(QUrl::fromLocalFile(qt_shader_cache_path));
1669 const auto qt_shader_cache_file_path =
1670 QString::fromStdString(Common::FS::PathToUTF8String(transferable_shader_cache_file_path));
1671
1672 // Windows supports opening a folder with selecting a specified file in explorer. On every other
1673 // OS we just open the transferable shader cache folder without preselecting the transferable
1674 // shader cache file for the selected game.
1675#if defined(Q_OS_WIN)
1676 const QString explorer = QStringLiteral("explorer");
1677 QStringList param;
1678 if (!QFileInfo(qt_shader_cache_file_path).isDir()) {
1679 param << QStringLiteral("/select,");
1680 }
1681 param << QDir::toNativeSeparators(qt_shader_cache_file_path);
1682 QProcess::startDetached(explorer, param);
1683#else
1684 QDesktopServices::openUrl(QUrl::fromLocalFile(qt_shader_cache_folder_path));
1685#endif
1686} 1649}
1687 1650
1688static std::size_t CalculateRomFSEntrySize(const FileSys::VirtualDir& dir, bool full) { 1651static std::size_t CalculateRomFSEntrySize(const FileSys::VirtualDir& dir, bool full) {
@@ -1825,8 +1788,12 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ
1825 const std::string& game_path) { 1788 const std::string& game_path) {
1826 const QString question = [this, target] { 1789 const QString question = [this, target] {
1827 switch (target) { 1790 switch (target) {
1828 case GameListRemoveTarget::ShaderCache: 1791 case GameListRemoveTarget::GlShaderCache:
1829 return tr("Delete Transferable Shader Cache?"); 1792 return tr("Delete OpenGL Transferable Shader Cache?");
1793 case GameListRemoveTarget::VkShaderCache:
1794 return tr("Delete Vulkan Transferable Shader Cache?");
1795 case GameListRemoveTarget::AllShaderCache:
1796 return tr("Delete All Transferable Shader Caches?");
1830 case GameListRemoveTarget::CustomConfiguration: 1797 case GameListRemoveTarget::CustomConfiguration:
1831 return tr("Remove Custom Game Configuration?"); 1798 return tr("Remove Custom Game Configuration?");
1832 default: 1799 default:
@@ -1840,8 +1807,12 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ
1840 } 1807 }
1841 1808
1842 switch (target) { 1809 switch (target) {
1843 case GameListRemoveTarget::ShaderCache: 1810 case GameListRemoveTarget::GlShaderCache:
1844 RemoveTransferableShaderCache(program_id); 1811 case GameListRemoveTarget::VkShaderCache:
1812 RemoveTransferableShaderCache(program_id, target);
1813 break;
1814 case GameListRemoveTarget::AllShaderCache:
1815 RemoveAllTransferableShaderCaches(program_id);
1845 break; 1816 break;
1846 case GameListRemoveTarget::CustomConfiguration: 1817 case GameListRemoveTarget::CustomConfiguration:
1847 RemoveCustomConfiguration(program_id, game_path); 1818 RemoveCustomConfiguration(program_id, game_path);
@@ -1849,18 +1820,27 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ
1849 } 1820 }
1850} 1821}
1851 1822
1852void GMainWindow::RemoveTransferableShaderCache(u64 program_id) { 1823void GMainWindow::RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target) {
1824 const auto target_file_name = [target] {
1825 switch (target) {
1826 case GameListRemoveTarget::GlShaderCache:
1827 return "opengl.bin";
1828 case GameListRemoveTarget::VkShaderCache:
1829 return "vulkan.bin";
1830 default:
1831 return "";
1832 }
1833 }();
1853 const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); 1834 const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir);
1854 const auto transferable_shader_cache_file_path = 1835 const auto shader_cache_folder_path = shader_cache_dir / fmt::format("{:016x}", program_id);
1855 shader_cache_dir / "opengl" / "transferable" / fmt::format("{:016X}.bin", program_id); 1836 const auto target_file = shader_cache_folder_path / target_file_name;
1856 1837
1857 if (!Common::FS::Exists(transferable_shader_cache_file_path)) { 1838 if (!Common::FS::Exists(target_file)) {
1858 QMessageBox::warning(this, tr("Error Removing Transferable Shader Cache"), 1839 QMessageBox::warning(this, tr("Error Removing Transferable Shader Cache"),
1859 tr("A shader cache for this title does not exist.")); 1840 tr("A shader cache for this title does not exist."));
1860 return; 1841 return;
1861 } 1842 }
1862 1843 if (Common::FS::RemoveFile(target_file)) {
1863 if (Common::FS::RemoveFile(transferable_shader_cache_file_path)) {
1864 QMessageBox::information(this, tr("Successfully Removed"), 1844 QMessageBox::information(this, tr("Successfully Removed"),
1865 tr("Successfully removed the transferable shader cache.")); 1845 tr("Successfully removed the transferable shader cache."));
1866 } else { 1846 } else {
@@ -1869,6 +1849,24 @@ void GMainWindow::RemoveTransferableShaderCache(u64 program_id) {
1869 } 1849 }
1870} 1850}
1871 1851
1852void GMainWindow::RemoveAllTransferableShaderCaches(u64 program_id) {
1853 const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir);
1854 const auto program_shader_cache_dir = shader_cache_dir / fmt::format("{:016x}", program_id);
1855
1856 if (!Common::FS::Exists(program_shader_cache_dir)) {
1857 QMessageBox::warning(this, tr("Error Removing Transferable Shader Caches"),
1858 tr("A shader cache for this title does not exist."));
1859 return;
1860 }
1861 if (Common::FS::RemoveDirRecursively(program_shader_cache_dir)) {
1862 QMessageBox::information(this, tr("Successfully Removed"),
1863 tr("Successfully removed the transferable shader caches."));
1864 } else {
1865 QMessageBox::warning(this, tr("Error Removing Transferable Shader Caches"),
1866 tr("Failed to remove the transferable shader cache directory."));
1867 }
1868}
1869
1872void GMainWindow::RemoveCustomConfiguration(u64 program_id, const std::string& game_path) { 1870void GMainWindow::RemoveCustomConfiguration(u64 program_id, const std::string& game_path) {
1873 const auto file_path = std::filesystem::path(Common::FS::ToU8String(game_path)); 1871 const auto file_path = std::filesystem::path(Common::FS::ToU8String(game_path));
1874 const auto config_file_name = 1872 const auto config_file_name =
@@ -2823,7 +2821,7 @@ void GMainWindow::OnCaptureScreenshot() {
2823 QString::fromStdString(Common::FS::GetYuzuPathString(Common::FS::YuzuPath::ScreenshotsDir)); 2821 QString::fromStdString(Common::FS::GetYuzuPathString(Common::FS::YuzuPath::ScreenshotsDir));
2824 const auto date = 2822 const auto date =
2825 QDateTime::currentDateTime().toString(QStringLiteral("yyyy-MM-dd_hh-mm-ss-zzz")); 2823 QDateTime::currentDateTime().toString(QStringLiteral("yyyy-MM-dd_hh-mm-ss-zzz"));
2826 QString filename = QStringLiteral("%1%2_%3.png") 2824 QString filename = QStringLiteral("%1/%2_%3.png")
2827 .arg(screenshot_path) 2825 .arg(screenshot_path)
2828 .arg(title_id, 16, 16, QLatin1Char{'0'}) 2826 .arg(title_id, 16, 16, QLatin1Char{'0'})
2829 .arg(date); 2827 .arg(date);
@@ -2900,13 +2898,13 @@ void GMainWindow::UpdateStatusBar() {
2900 return; 2898 return;
2901 } 2899 }
2902 2900
2903 auto results = Core::System::GetInstance().GetAndResetPerfStats(); 2901 auto& system = Core::System::GetInstance();
2904 auto& shader_notify = Core::System::GetInstance().GPU().ShaderNotify(); 2902 auto results = system.GetAndResetPerfStats();
2905 const auto shaders_building = shader_notify.GetShadersBuilding(); 2903 auto& shader_notify = system.GPU().ShaderNotify();
2904 const int shaders_building = shader_notify.ShadersBuilding();
2906 2905
2907 if (shaders_building != 0) { 2906 if (shaders_building > 0) {
2908 shader_building_label->setText( 2907 shader_building_label->setText(tr("Building: %n shader(s)", "", shaders_building));
2909 tr("Building: %n shader(s)", "", static_cast<int>(shaders_building)));
2910 shader_building_label->setVisible(true); 2908 shader_building_label->setVisible(true);
2911 } else { 2909 } else {
2912 shader_building_label->setVisible(false); 2910 shader_building_label->setVisible(false);
@@ -2932,12 +2930,35 @@ void GMainWindow::UpdateStatusBar() {
2932 emu_frametime_label->setVisible(true); 2930 emu_frametime_label->setVisible(true);
2933} 2931}
2934 2932
2933void GMainWindow::UpdateGPUAccuracyButton() {
2934 switch (Settings::values.gpu_accuracy.GetValue()) {
2935 case Settings::GPUAccuracy::Normal: {
2936 gpu_accuracy_button->setText(tr("GPU NORMAL"));
2937 gpu_accuracy_button->setChecked(false);
2938 break;
2939 }
2940 case Settings::GPUAccuracy::High: {
2941 gpu_accuracy_button->setText(tr("GPU HIGH"));
2942 gpu_accuracy_button->setChecked(true);
2943 break;
2944 }
2945 case Settings::GPUAccuracy::Extreme: {
2946 gpu_accuracy_button->setText(tr("GPU EXTREME"));
2947 gpu_accuracy_button->setChecked(true);
2948 break;
2949 }
2950 default: {
2951 gpu_accuracy_button->setText(tr("GPU ERROR"));
2952 gpu_accuracy_button->setChecked(true);
2953 }
2954 }
2955}
2956
2935void GMainWindow::UpdateStatusButtons() { 2957void GMainWindow::UpdateStatusButtons() {
2936 dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue()); 2958 dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue());
2937 multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue());
2938 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue());
2939 renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() == 2959 renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() ==
2940 Settings::RendererBackend::Vulkan); 2960 Settings::RendererBackend::Vulkan);
2961 UpdateGPUAccuracyButton();
2941} 2962}
2942 2963
2943void GMainWindow::UpdateUISettings() { 2964void GMainWindow::UpdateUISettings() {
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index a50e5b9fe..38e66ccd0 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -282,7 +282,8 @@ private:
282 void RemoveBaseContent(u64 program_id, const QString& entry_type); 282 void RemoveBaseContent(u64 program_id, const QString& entry_type);
283 void RemoveUpdateContent(u64 program_id, const QString& entry_type); 283 void RemoveUpdateContent(u64 program_id, const QString& entry_type);
284 void RemoveAddOnContent(u64 program_id, const QString& entry_type); 284 void RemoveAddOnContent(u64 program_id, const QString& entry_type);
285 void RemoveTransferableShaderCache(u64 program_id); 285 void RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target);
286 void RemoveAllTransferableShaderCaches(u64 program_id);
286 void RemoveCustomConfiguration(u64 program_id, const std::string& game_path); 287 void RemoveCustomConfiguration(u64 program_id, const std::string& game_path);
287 std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id); 288 std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id);
288 InstallResult InstallNSPXCI(const QString& filename); 289 InstallResult InstallNSPXCI(const QString& filename);
@@ -291,6 +292,7 @@ private:
291 void UpdateWindowTitle(std::string_view title_name = {}, std::string_view title_version = {}, 292 void UpdateWindowTitle(std::string_view title_name = {}, std::string_view title_version = {},
292 std::string_view gpu_vendor = {}); 293 std::string_view gpu_vendor = {});
293 void UpdateStatusBar(); 294 void UpdateStatusBar();
295 void UpdateGPUAccuracyButton();
294 void UpdateStatusButtons(); 296 void UpdateStatusButtons();
295 void UpdateUISettings(); 297 void UpdateUISettings();
296 void HideMouseCursor(); 298 void HideMouseCursor();
@@ -316,8 +318,7 @@ private:
316 QLabel* emu_speed_label = nullptr; 318 QLabel* emu_speed_label = nullptr;
317 QLabel* game_fps_label = nullptr; 319 QLabel* game_fps_label = nullptr;
318 QLabel* emu_frametime_label = nullptr; 320 QLabel* emu_frametime_label = nullptr;
319 QPushButton* async_status_button = nullptr; 321 QPushButton* gpu_accuracy_button = nullptr;
320 QPushButton* multicore_status_button = nullptr;
321 QPushButton* renderer_status_button = nullptr; 322 QPushButton* renderer_status_button = nullptr;
322 QPushButton* dock_status_button = nullptr; 323 QPushButton* dock_status_button = nullptr;
323 QTimer status_bar_update_timer; 324 QTimer status_bar_update_timer;
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 3e22fee37..640d7d111 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -444,6 +444,8 @@ void Config::ReadValues() {
444 // Renderer 444 // Renderer
445 ReadSetting("Renderer", Settings::values.renderer_backend); 445 ReadSetting("Renderer", Settings::values.renderer_backend);
446 ReadSetting("Renderer", Settings::values.renderer_debug); 446 ReadSetting("Renderer", Settings::values.renderer_debug);
447 ReadSetting("Renderer", Settings::values.enable_nsight_aftermath);
448 ReadSetting("Renderer", Settings::values.disable_shader_loop_safety_checks);
447 ReadSetting("Renderer", Settings::values.vulkan_device); 449 ReadSetting("Renderer", Settings::values.vulkan_device);
448 450
449 ReadSetting("Renderer", Settings::values.fullscreen_mode); 451 ReadSetting("Renderer", Settings::values.fullscreen_mode);
@@ -456,7 +458,7 @@ void Config::ReadValues() {
456 ReadSetting("Renderer", Settings::values.use_asynchronous_gpu_emulation); 458 ReadSetting("Renderer", Settings::values.use_asynchronous_gpu_emulation);
457 ReadSetting("Renderer", Settings::values.use_vsync); 459 ReadSetting("Renderer", Settings::values.use_vsync);
458 ReadSetting("Renderer", Settings::values.disable_fps_limit); 460 ReadSetting("Renderer", Settings::values.disable_fps_limit);
459 ReadSetting("Renderer", Settings::values.use_assembly_shaders); 461 ReadSetting("Renderer", Settings::values.shader_backend);
460 ReadSetting("Renderer", Settings::values.use_asynchronous_shaders); 462 ReadSetting("Renderer", Settings::values.use_asynchronous_shaders);
461 ReadSetting("Renderer", Settings::values.use_nvdec_emulation); 463 ReadSetting("Renderer", Settings::values.use_nvdec_emulation);
462 ReadSetting("Renderer", Settings::values.accelerate_astc); 464 ReadSetting("Renderer", Settings::values.accelerate_astc);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 88d33ecab..b7115b06a 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -221,6 +221,14 @@ backend =
221# 0 (default): Disabled, 1: Enabled 221# 0 (default): Disabled, 1: Enabled
222debug = 222debug =
223 223
224# Enable Nsight Aftermath crash dumps
225# 0 (default): Disabled, 1: Enabled
226nsight_aftermath =
227
228# Disable shader loop safety checks, executing the shader without loop logic changes
229# 0 (default): Disabled, 1: Enabled
230disable_shader_loop_safety_checks =
231
224# Which Vulkan physical device to use (defaults to 0) 232# Which Vulkan physical device to use (defaults to 0)
225vulkan_device = 233vulkan_device =
226 234
@@ -240,9 +248,10 @@ max_anisotropy =
240# 0 (default): Off, 1: On 248# 0 (default): Off, 1: On
241use_vsync = 249use_vsync =
242 250
243# Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required. 251# Selects the OpenGL shader backend. NV_gpu_program5 is required for GLASM. If NV_gpu_program5 is
244# 0: Off, 1 (default): On 252# not available and GLASM is selected, GLSL will be used.
245use_assembly_shaders = 253# 0: GLSL, 1 (default): GLASM, 2: SPIR-V
254shader_backend =
246 255
247# Whether to allow asynchronous shader building. 256# Whether to allow asynchronous shader building.
248# 0 (default): Off, 1: On 257# 0 (default): Off, 1: On
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index ac4ea88d3..35ce23696 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -218,9 +218,11 @@ int main(int argc, char** argv) {
218 // Core is loaded, start the GPU (makes the GPU contexts current to this thread) 218 // Core is loaded, start the GPU (makes the GPU contexts current to this thread)
219 system.GPU().Start(); 219 system.GPU().Start();
220 220
221 system.Renderer().ReadRasterizer()->LoadDiskResources( 221 if (Settings::values.use_disk_shader_cache.GetValue()) {
222 system.CurrentProcess()->GetTitleID(), std::stop_token{}, 222 system.Renderer().ReadRasterizer()->LoadDiskResources(
223 [](VideoCore::LoadCallbackStage, size_t value, size_t total) {}); 223 system.CurrentProcess()->GetTitleID(), std::stop_token{},
224 [](VideoCore::LoadCallbackStage, size_t value, size_t total) {});
225 }
224 226
225 void(system.Run()); 227 void(system.Run());
226 while (emu_window->IsOpen()) { 228 while (emu_window->IsOpen()) {